Updated beets library to 1.0b11

This commit is contained in:
Remy Varma
2011-11-03 22:03:03 +00:00
parent 0ba1e578ab
commit 5cf995b3dc
27 changed files with 8689 additions and 1059 deletions
+2 -3
View File
@@ -12,8 +12,7 @@
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
__version__ = '1.0b9'
__version__ = '1.0b11'
__author__ = 'Adrian Sampson <adrian@radbox.org>'
from lib.beets import library
Library = library.Library
from lib.beets.library import Library
+19
View File
@@ -0,0 +1,19 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
__version__ = '1.0b11'
__author__ = 'Adrian Sampson <adrian@radbox.org>'
import lib.beets.library as beetslibrary
Library = beetslibrary.Library
+46 -535
View File
@@ -16,80 +16,23 @@
"""
import os
import logging
from collections import defaultdict
import re
from lib.munkres import Munkres
# from lib.unidecode import unidecode
from lib.beets.autotag import mb
from lib.beets import library, mediafile, plugins
from lib.beets.util import levenshtein, sorted_walk
from lib.beets import library, mediafile
from lib.beets.util import sorted_walk
# Try 5 releases. In the future, this should be more dynamic: let the
# probability of continuing to the next release be inversely
# proportional to how good our current best is and how long we've
# already taken.
MAX_CANDIDATES = 5
# Distance parameters.
# Text distance weights: proportions on the normalized intuitive edit
# distance.
ARTIST_WEIGHT = 3.0
ALBUM_WEIGHT = 3.0
# The weight of the entire distance calculated for a given track.
TRACK_WEIGHT = 1.0
# These distances are components of the track distance (that is, they
# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
# the overall TRACK_WEIGHT does that).
TRACK_TITLE_WEIGHT = 3.0
# Used instead of a global artist penalty for various-artist matches.
TRACK_ARTIST_WEIGHT = 2.0
# Added when the indices of tracks don't match.
TRACK_INDEX_WEIGHT = 1.0
# Track length weights: no penalty before GRACE, maximum (WEIGHT)
# penalty at GRACE+MAX discrepancy.
TRACK_LENGTH_GRACE = 10
TRACK_LENGTH_MAX = 30
TRACK_LENGTH_WEIGHT = 2.0
# MusicBrainz track ID matches.
TRACK_ID_WEIGHT = 5.0
# Recommendation constants.
RECOMMEND_STRONG = 'RECOMMEND_STRONG'
RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
RECOMMEND_NONE = 'RECOMMEND_NONE'
# Thresholds for recommendations.
STRONG_REC_THRESH = 0.04
MEDIUM_REC_THRESH = 0.25
REC_GAP_THRESH = 0.25
# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
SD_END_WORDS = ['the', 'a', 'an']
# Reduced weights for certain portions of the string.
SD_PATTERNS = [
(r'^the ', 0.1),
(r'[\[\(]?(ep|single)[\]\)]?', 0.0),
(r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
(r'\(.*?\)', 0.3),
(r'\[.*?\]', 0.3),
(r'(, )?(pt\.|part) .+', 0.2),
]
# Replacements to use before testing distance.
SD_REPLACE = [
(r'&', 'and'),
]
# Artist signals that indicate "various artists".
VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
# Autotagging exceptions.
class AutotagError(Exception):
pass
# Parts of external interface.
from .hooks import AlbumInfo, TrackInfo
from .match import AutotagError
from .match import tag_item, tag_album
from .match import RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE
from .match import STRONG_REC_THRESH, MEDIUM_REC_THRESH, REC_GAP_THRESH
# Global logger.
log = logging.getLogger('beets')
# Additional utilities for the main interface.
def albums_in_dir(path):
"""Recursively searches the given directory and returns an iterable
of (path, items) where path is a containing directory and items is
@@ -113,488 +56,56 @@ def albums_in_dir(path):
if items:
yield root, items
def _string_dist_basic(str1, str2):
"""Basic edit distance between two strings, ignoring
non-alphanumeric characters and case. Comparisons are based on a
transliteration/lowering to ASCII characters. Normalized by string
length.
def apply_item_metadata(item, track_info):
"""Set an item's metadata from its matched TrackInfo object.
"""
# str1 = unidecode(str1)
# str2 = unidecode(str2)
str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
if not str1 and not str2:
return 0.0
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
def string_dist(str1, str2):
"""Gives an "intuitive" edit distance between two strings. This is
an edit distance, normalized by the string length, with a number of
tweaks that reflect intuition about text.
"""
str1 = str1.lower()
str2 = str2.lower()
# Don't penalize strings that move certain words to the end. For
# example, "the something" should be considered equal to
# "something, the".
for word in SD_END_WORDS:
if str1.endswith(', %s' % word):
str1 = '%s %s' % (word, str1[:-len(word)-2])
if str2.endswith(', %s' % word):
str2 = '%s %s' % (word, str2[:-len(word)-2])
# Perform a couple of basic normalizing substitutions.
for pat, repl in SD_REPLACE:
str1 = re.sub(pat, repl, str1)
str2 = re.sub(pat, repl, str2)
# Change the weight for certain string portions matched by a set
# of regular expressions. We gradually change the strings and build
# up penalties associated with parts of the string that were
# deleted.
base_dist = _string_dist_basic(str1, str2)
penalty = 0.0
for pat, weight in SD_PATTERNS:
# Get strings that drop the pattern.
case_str1 = re.sub(pat, '', str1)
case_str2 = re.sub(pat, '', str2)
if case_str1 != str1 or case_str2 != str2:
# If the pattern was present (i.e., it is deleted in the
# the current case), recalculate the distances for the
# modified strings.
case_dist = _string_dist_basic(case_str1, case_str2)
case_delta = max(0.0, base_dist - case_dist)
if case_delta == 0.0:
continue
# Shift our baseline strings down (to avoid rematching the
# same part of the string) and add a scaled distance
# amount to the penalties.
str1 = case_str1
str2 = case_str2
base_dist = case_dist
penalty += weight * case_delta
dist = base_dist + penalty
return dist
def _plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set and if it is the only object is the set.
"""
# Calculate frequencies.
freqs = defaultdict(int)
for obj in objs:
freqs[obj] += 1
# Find object with maximum frequency.
max_freq = 0
res = None
for obj, freq in freqs.items():
if freq > max_freq:
max_freq = freq
res = obj
return res, len(freqs) <= 1
def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
keys = 'artist', 'album'
likelies = {}
consensus = {}
for key in keys:
values = [getattr(item, key) for item in items]
likelies[key], consensus[key] = _plurality(values)
return likelies['artist'], likelies['album'], consensus['artist']
def order_items(items, trackinfo):
"""Orders the items based on how they match some canonical track
information. This always produces a result if the numbers of tracks
match.
"""
# Make sure lengths match.
if len(items) != len(trackinfo):
return None
# Construct the cost matrix.
costs = []
for cur_item in items:
row = []
for i, canon_item in enumerate(trackinfo):
row.append(track_distance(cur_item, canon_item, i+1))
costs.append(row)
# Find a minimum-cost bipartite matching.
matching = Munkres().compute(costs)
# Order items based on the matching.
ordered_items = [None]*len(items)
for cur_idx, canon_idx in matching:
ordered_items[canon_idx] = items[cur_idx]
return ordered_items
def track_distance(item, track_data, track_index=None, incl_artist=False):
"""Determines the significance of a track metadata change. Returns
a float in [0.0,1.0]. `track_index` is the track number of the
`track_data` metadata set. If `track_index` is provided and
item.track is set, then these indices are used as a component of
the distance calculation. `incl_artist` indicates that a distance
component should be included for the track artist (i.e., for
various-artist releases).
"""
# Distance and normalization accumulators.
dist, dist_max = 0.0, 0.0
# Check track length.
if 'length' not in track_data:
# If there's no length to check, assume the worst.
dist += TRACK_LENGTH_WEIGHT
else:
diff = abs(item.length - track_data['length'])
diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
diff = min(diff, TRACK_LENGTH_MAX)
dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
dist_max += TRACK_LENGTH_WEIGHT
# Track title.
dist += string_dist(item.title, track_data['title']) * TRACK_TITLE_WEIGHT
dist_max += TRACK_TITLE_WEIGHT
# Track artist, if included.
# Attention: MB DB does not have artist info for all compilations,
# so only check artist distance if there is actually an artist in
# the MB track data.
if incl_artist and 'artist' in track_data:
dist += string_dist(item.artist, track_data['artist']) * \
TRACK_ARTIST_WEIGHT
dist_max += TRACK_ARTIST_WEIGHT
# Track index.
if track_index and item.track:
if track_index != item.track:
dist += TRACK_INDEX_WEIGHT
dist_max += TRACK_INDEX_WEIGHT
# MusicBrainz track ID.
if item.mb_trackid:
if item.mb_trackid != track_data['id']:
dist += TRACK_ID_WEIGHT
dist_max += TRACK_ID_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.track_distance(item, track_data)
dist += plugin_d
dist_max += plugin_dm
return dist / dist_max
def distance(items, info):
"""Determines how "significant" an album metadata change would be.
Returns a float in [0.0,1.0]. The list of items must be ordered.
"""
cur_artist, cur_album, _ = current_metadata(items)
cur_artist = cur_artist or ''
cur_album = cur_album or ''
# These accumulate the possible distance components. The final
# distance will be dist/dist_max.
dist = 0.0
dist_max = 0.0
# Artist/album metadata.
if not info['va']:
dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT
dist_max += ARTIST_WEIGHT
dist += string_dist(cur_album, info['album']) * ALBUM_WEIGHT
dist_max += ALBUM_WEIGHT
# Track distances.
for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
dist += track_distance(item, track_data, i+1, info['va']) * \
TRACK_WEIGHT
dist_max += TRACK_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, info)
dist += plugin_d
dist_max += plugin_dm
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
else:
return dist/dist_max
def apply_item_metadata(item, track_data):
"""Set an item's metadata from its matched info dictionary.
"""
item.artist = track_data['artist']
item.title = track_data['title']
item.mb_trackid = track_data['id']
if 'artist_id' in track_data:
item.mb_artistid = track_data['artist_id']
item.artist = track_info.artist
item.title = track_info.title
item.mb_trackid = track_info.track_id
if track_info.artist_id:
item.mb_artistid = track_info.artist_id
# At the moment, the other metadata is left intact (including album
# and track number). Perhaps these should be emptied?
def apply_metadata(items, info):
"""Set the items' metadata to match the data given in info. The
list of items must be ordered.
def apply_metadata(items, album_info):
"""Set the items' metadata to match an AlbumInfo object. The list
of items must be ordered.
"""
for index, (item, track_data) in enumerate(zip(items, info['tracks'])):
for index, (item, track_info) in enumerate(zip(items, album_info.tracks)):
# Album, artist, track count.
if 'artist' in track_data:
item.artist = track_data['artist']
if track_info.artist:
item.artist = track_info.artist
else:
item.artist = info['artist']
item.albumartist = info['artist']
item.album = info['album']
item.artist = album_info.artist
item.albumartist = album_info.artist
item.album = album_info.album
item.tracktotal = len(items)
# Release date.
if 'year' in info:
item.year = info['year']
if 'month' in info:
item.month = info['month']
if 'day' in info:
item.day = info['day']
if album_info.year:
item.year = album_info.year
if album_info.month:
item.month = album_info.month
if album_info.day:
item.day = album_info.day
# Title and track index.
item.title = track_data['title']
item.title = track_info.title
item.track = index + 1
# MusicBrainz IDs.
item.mb_trackid = track_data['id']
item.mb_albumid = info['album_id']
if 'artist_id' in track_data:
item.mb_artistid = track_data['artist_id']
item.mb_trackid = track_info.track_id
item.mb_albumid = album_info.album_id
if track_info.artist_id:
item.mb_artistid = track_info.artist_id
else:
item.mb_artistid = info['artist_id']
item.mb_albumartistid = info['artist_id']
item.albumtype = info['albumtype']
item.mb_artistid = album_info.artist_id
item.mb_albumartistid = album_info.artist_id
item.albumtype = album_info.albumtype
if album_info.label:
item.label = album_info.label
# Compilation flag.
item.comp = info['va']
item.comments = 'tagged by headphones/beets'
def match_by_id(items):
"""If the items are tagged with a MusicBrainz album ID, returns an
info dict for the corresponding album. Otherwise, returns None.
"""
# Is there a consensus on the MB album ID?
albumids = [item.mb_albumid for item in items if item.mb_albumid]
if not albumids:
log.debug('No album IDs found.')
return None
# If all album IDs are equal, look up the album.
if bool(reduce(lambda x,y: x if x==y else (), albumids)):
albumid = albumids[0]
log.debug('Searching for discovered album ID: ' + albumid)
return mb.album_for_id(albumid)
else:
log.debug('No album ID consensus.')
return None
#fixme In the future, at the expense of performance, we could use
# other IDs (i.e., track and artist) in case the album tag isn't
# present, but that event seems very unlikely.
def recommendation(results):
"""Given a sorted list of result tuples, returns a recommendation
flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
on the results' distances.
"""
if not results:
# No candidates: no recommendation.
rec = RECOMMEND_NONE
else:
min_dist = results[0][0]
if min_dist < STRONG_REC_THRESH:
# Strong recommendation level.
rec = RECOMMEND_STRONG
elif len(results) == 1:
# Only a single candidate. Medium recommendation.
rec = RECOMMEND_MEDIUM
elif min_dist <= MEDIUM_REC_THRESH:
# Medium recommendation level.
rec = RECOMMEND_MEDIUM
elif results[1][0] - min_dist >= REC_GAP_THRESH:
# Gap between first two candidates is large.
rec = RECOMMEND_MEDIUM
else:
# No conclusion.
rec = RECOMMEND_NONE
return rec
def validate_candidate(items, tuple_dict, info):
"""Given a candidate info dict, attempt to add the candidate to
the output dictionary of result tuples. This involves checking
the track count, ordering the items, checking for duplicates, and
calculating the distance.
"""
log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
# Don't duplicate.
if info['album_id'] in tuple_dict:
log.debug('Duplicate.')
return
# Make sure the album has the correct number of tracks.
if len(items) != len(info['tracks']):
log.debug('Track count mismatch.')
return
# Put items in order.
ordered = order_items(items, info['tracks'])
if not ordered:
log.debug('Not orderable.')
return
# Get the change distance.
dist = distance(ordered, info)
log.debug('Success. Distance: %f' % dist)
tuple_dict[info['album_id']] = dist, ordered, info
def tag_album(items, timid=False, search_artist=None, search_album=None,
search_id=None):
"""Bundles together the functionality used to infer tags for a
set of items comprised by an album. Returns everything relevant:
- The current artist.
- The current album.
- A list of (distance, items, info) tuples where info is a
dictionary containing the inferred tags and items is a
reordered version of the input items list. The candidates are
sorted by distance (i.e., best match first).
- A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
or RECOMMEND_NONE; indicating that the first candidate is
very likely, it is somewhat likely, or no conclusion could
be reached.
If search_artist and search_album or search_id are provided, then
they are used as search terms in place of the current metadata.
May raise an AutotagError if existing metadata is insufficient.
"""
# Get current metadata.
cur_artist, cur_album, artist_consensus = current_metadata(items)
log.debug('Tagging %s - %s' % (cur_artist, cur_album))
# The output result tuples (keyed by MB album ID).
out_tuples = {}
# Try to find album indicated by MusicBrainz IDs.
if search_id:
log.debug('Searching for album ID: ' + search_id)
id_info = mb.album_for_id(search_id)
else:
id_info = match_by_id(items)
if id_info:
validate_candidate(items, out_tuples, id_info)
rec = recommendation(out_tuples.values())
log.debug('Album ID match recommendation is ' + str(rec))
if out_tuples and not timid:
# If we have a very good MBID match, return immediately.
# Otherwise, this match will compete against metadata-based
# matches.
if rec == RECOMMEND_STRONG:
log.debug('ID match.')
return cur_artist, cur_album, out_tuples.values(), rec
# If searching by ID, don't continue to metadata search.
if search_id is not None:
if out_tuples:
return cur_artist, cur_album, out_tuples.values(), rec
else:
return cur_artist, cur_album, [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_album):
# No explicit search terms -- use current metadata.
search_artist, search_album = cur_artist, cur_album
log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
# Get candidate metadata from search.
if search_artist and search_album:
candidates = mb.match_album(search_artist, search_album,
len(items), MAX_CANDIDATES)
candidates = list(candidates)
else:
candidates = []
# Possibly add "various artists" search.
if search_album and ((not artist_consensus) or \
(search_artist.lower() in VA_ARTISTS) or \
any(item.comp for item in items)):
log.debug(u'Possibly Various Artists; adding matches.')
candidates.extend(mb.match_album(None, search_album, len(items),
MAX_CANDIDATES))
# Get candidates from plugins.
candidates.extend(plugins.candidates(items))
# Get the distance to each candidate.
log.debug(u'Evaluating %i candidates.' % len(candidates))
for info in candidates:
validate_candidate(items, out_tuples, info)
# Sort by distance.
out_tuples = out_tuples.values()
out_tuples.sort()
rec = recommendation(out_tuples)
return cur_artist, cur_album, out_tuples, rec
def tag_item(item, timid=False, search_artist=None, search_title=None,
search_id=None):
"""Attempts to find metadata for a single track. Returns a
`(candidates, recommendation)` pair where `candidates` is a list
of `(distance, track_info)` pairs. `search_artist` and
`search_title` may be used to override the current metadata for
the purposes of the MusicBrainz title; likewise `search_id`.
"""
candidates = []
# First, try matching by MusicBrainz ID.
trackid = search_id or item.mb_trackid
if trackid:
log.debug('Searching for track ID: ' + trackid)
track_info = mb.track_for_id(trackid)
if track_info:
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# If this is a good match, then don't keep searching.
rec = recommendation(candidates)
if rec == RECOMMEND_STRONG and not timid:
log.debug('Track ID match.')
return candidates, rec
# If we're searching by ID, don't proceed.
if search_id is not None:
if candidates:
return candidates, rec
else:
return [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_title):
search_artist, search_title = item.artist, item.title
log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
# Candidate metadata from search.
for track_info in mb.match_track(search_artist, search_title):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Add candidates from plugins.
for track_info in plugins.item_candidates(item):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Sort by distance and return with recommendation.
log.debug('Found %i candidates.' % len(candidates))
candidates.sort()
rec = recommendation(candidates)
return candidates, rec
item.comp = album_info.va
# Uncomment to get rid of comments tag
item.comments = 'tagged by headphones/beets'
+109
View File
@@ -0,0 +1,109 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Facilities for automatically determining files' correct metadata.
"""
import os
import logging
from lib.beets import library, mediafile
from lib.beets.util import sorted_walk
# Parts of external interface.
from .hooks import AlbumInfo, TrackInfo
from .match import AutotagError
from .match import tag_item, tag_album
from .match import RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE
from .match import STRONG_REC_THRESH, MEDIUM_REC_THRESH, REC_GAP_THRESH
# Global logger.
log = logging.getLogger('beets')
# Additional utilities for the main interface.
def albums_in_dir(path):
"""Recursively searches the given directory and returns an iterable
of (path, items) where path is a containing directory and items is
a list of Items that is probably an album. Specifically, any folder
containing any media files is an album.
"""
for root, dirs, files in sorted_walk(path):
# Get a list of items in the directory.
items = []
for filename in files:
try:
i = library.Item.from_path(os.path.join(root, filename))
except mediafile.FileTypeError:
pass
except mediafile.UnreadableFileError:
log.warn('unreadable file: ' + filename)
else:
items.append(i)
# If it's nonempty, yield it.
if items:
yield root, items
def apply_item_metadata(item, track_info):
"""Set an item's metadata from its matched TrackInfo object.
"""
item.artist = track_info.artist
item.title = track_info.title
item.mb_trackid = track_info.track_id
if track_info.artist_id:
item.mb_artistid = track_info.artist_id
# At the moment, the other metadata is left intact (including album
# and track number). Perhaps these should be emptied?
def apply_metadata(items, album_info):
"""Set the items' metadata to match an AlbumInfo object. The list
of items must be ordered.
"""
for index, (item, track_info) in enumerate(zip(items, album_info.tracks)):
# Album, artist, track count.
if track_info.artist:
item.artist = track_info.artist
else:
item.artist = album_info.artist
item.albumartist = album_info.artist
item.album = album_info.album
item.tracktotal = len(items)
# Release date.
if album_info.year:
item.year = album_info.year
if album_info.month:
item.month = album_info.month
if album_info.day:
item.day = album_info.day
# Title and track index.
item.title = track_info.title
item.track = index + 1
# MusicBrainz IDs.
item.mb_trackid = track_info.track_id
item.mb_albumid = album_info.album_id
if track_info.artist_id:
item.mb_artistid = track_info.artist_id
else:
item.mb_artistid = album_info.artist_id
item.mb_albumartistid = album_info.artist_id
item.albumtype = album_info.albumtype
if album_info.label:
item.label = album_info.label
# Compilation flag.
item.comp = album_info.va
+42 -6
View File
@@ -1,5 +1,5 @@
# This file is part of beets.
# Copyright 2010, Adrian Sampson.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@@ -17,9 +17,13 @@
import urllib
import sys
import logging
import os
from lib.beets.autotag.mb import album_for_id
IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg']
COVER_NAMES = ['cover', 'front', 'art', 'album', 'folder']
# The common logger.
log = logging.getLogger('beets')
@@ -47,15 +51,47 @@ def art_for_asin(asin):
return fn
# Art from the filesystem.
def art_in_path(path):
"""Look for album art files in a specified directory."""
if not os.path.isdir(path):
return
# Find all files that look like images in the directory.
images = []
for fn in os.listdir(path):
for ext in IMAGE_EXTENSIONS:
if fn.lower().endswith('.' + ext):
images.append(fn)
# Look for "preferred" filenames.
for fn in images:
for name in COVER_NAMES:
if fn.lower().startswith(name):
log.debug('Using well-named art file %s' % fn)
return os.path.join(path, fn)
# Fall back to any image in the folder.
if images:
log.debug('Using fallback art file %s' % images[0])
return os.path.join(path, images[0])
# Main interface.
def art_for_album(album):
def art_for_album(album, path):
"""Given an album info dictionary from MusicBrainz, returns a path
to downloaded art for the album (or None if no art is found).
"""
if album['asin']:
log.debug('Fetching album art for ASIN %s.' % album['asin'])
return art_for_asin(album['asin'])
if isinstance(path, basestring):
out = art_in_path(path)
if out:
return out
if album.asin:
log.debug('Fetching album art for ASIN %s.' % album.asin)
return art_for_asin(album.asin)
else:
log.debug('No ASIN available: no art found.')
return None
@@ -69,7 +105,7 @@ if __name__ == '__main__':
if not album:
print 'album not found'
else:
fn = art_for_album(album)
fn = art_for_album(album, None)
if fn:
print fn
print len(open(fn).read())/1024
+113
View File
@@ -0,0 +1,113 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Finding album art for tagged albums."""
import urllib
import sys
import logging
import os
from beets.autotag.mb import album_for_id
IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg']
COVER_NAMES = ['cover', 'front', 'art', 'album', 'folder']
# The common logger.
log = logging.getLogger('beets')
# Art from Amazon.
AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg'
AMAZON_INDICES = (1,2)
AMAZON_CONTENT_TYPE = 'image/jpeg'
def art_for_asin(asin):
"""Fetches art for an Amazon ID (ASIN) string."""
for index in AMAZON_INDICES:
# Fetch the image.
url = AMAZON_URL % (asin, index)
try:
log.debug('Downloading art: %s' % url)
fn, headers = urllib.urlretrieve(url)
except IOError:
log.debug('error fetching art at URL %s' % url)
continue
# Make sure it's actually an image.
if headers.gettype() == AMAZON_CONTENT_TYPE:
log.debug('Downloaded art to: %s' % fn)
return fn
# Art from the filesystem.
def art_in_path(path):
"""Look for album art files in a specified directory."""
if not os.path.isdir(path):
return
# Find all files that look like images in the directory.
images = []
for fn in os.listdir(path):
for ext in IMAGE_EXTENSIONS:
if fn.lower().endswith('.' + ext):
images.append(fn)
# Look for "preferred" filenames.
for fn in images:
for name in COVER_NAMES:
if fn.lower().startswith(name):
log.debug('Using well-named art file %s' % fn)
return os.path.join(path, fn)
# Fall back to any image in the folder.
if images:
log.debug('Using fallback art file %s' % images[0])
return os.path.join(path, images[0])
# Main interface.
def art_for_album(album, path):
"""Given an album info dictionary from MusicBrainz, returns a path
to downloaded art for the album (or None if no art is found).
"""
if isinstance(path, basestring):
out = art_in_path(path)
if out:
return out
if album.asin:
log.debug('Fetching album art for ASIN %s.' % album.asin)
return art_for_asin(album.asin)
else:
log.debug('No ASIN available: no art found.')
return None
# Smoke test.
if __name__ == '__main__':
aid = sys.argv[1]
album = album_for_id(aid)
if not album:
print 'album not found'
else:
fn = art_for_album(album, None)
if fn:
print fn
print len(open(fn).read())/1024
else:
print 'no art found'
+125
View File
@@ -0,0 +1,125 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Glue between metadata sources and the matching logic."""
from lib.beets import plugins
from lib.beets.autotag import mb
# Classes used to represent candidate options.
class AlbumInfo(object):
"""Describes a canonical release that may be used to match a release
in the library. Consists of these data members:
- ``album``: the release title
- ``album_id``: MusicBrainz ID; UUID fragment only
- ``artist``: name of the release's primary artist
- ``artist_id``
- ``tracks``: list of TrackInfo objects making up the release
- ``asin``: Amazon ASIN
- ``albumtype``: string describing the kind of release
- ``va``: boolean: whether the release has "various artists"
- ``year``: release year
- ``month``: release month
- ``day``: release day
- ``label``: music label responsible for the release
The fields up through ``tracks`` are required. The others are
optional and may be None.
"""
def __init__(self, album, album_id, artist, artist_id, tracks, asin=None,
albumtype=None, va=False, year=None, month=None, day=None,
label=None):
self.album = album
self.album_id = album_id
self.artist = artist
self.artist_id = artist_id
self.tracks = tracks
self.asin = asin
self.albumtype = albumtype
self.va = va
self.year = year
self.month = month
self.day = day
self.label = label
class TrackInfo(object):
"""Describes a canonical track present on a release. Appears as part
of an AlbumInfo's ``tracks`` list. Consists of these data members:
- ``title``: name of the track
- ``track_id``: MusicBrainz ID; UUID fragment only
- ``artist``: individual track artist name
- ``artist_id``
- ``length``: float: duration of the track in seconds
Only ``title`` and ``track_id`` are required. The rest of the fields
may be None.
"""
def __init__(self, title, track_id, artist=None, artist_id=None,
length=None):
self.title = title
self.track_id = track_id
self.artist = artist
self.artist_id = artist_id
self.length = length
# Aggregation of sources.
def _album_for_id(album_id):
"""Get an album corresponding to a MusicBrainz release ID."""
return mb.album_for_id(album_id)
def _track_for_id(track_id):
"""Get an item for a recording MBID."""
return mb.track_for_id(track_id)
def _album_candidates(items, artist, album, va_likely):
"""Search for album matches. ``items`` is a list of Item objects
that make up the album. ``artist`` and ``album`` are the respective
names (strings), which may be derived from the item list or may be
entered by the user. ``va_likely`` is a boolean indicating whether
the album is likely to be a "various artists" release.
"""
out = []
# Base candidates if we have album and artist to match.
if artist and album:
out.extend(mb.match_album(artist, album, len(items)))
# Also add VA matches from MusicBrainz where appropriate.
if va_likely and album:
out.extend(mb.match_album(None, album, len(items)))
# Candidates from plugins.
out.extend(plugins.candidates(items))
return out
def _item_candidates(item, artist, title):
"""Search for item matches. ``item`` is the Item to be matched.
``artist`` and ``title`` are strings and either reflect the item or
are specified by the user.
"""
out = []
# MusicBrainz candidates.
out.extend(mb.match_track(artist, title))
# Plugin candidates.
out.extend(plugins.item_candidates(item))
return out
+125
View File
@@ -0,0 +1,125 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Glue between metadata sources and the matching logic."""
from beets import plugins
from beets.autotag import mb
# Classes used to represent candidate options.
class AlbumInfo(object):
"""Describes a canonical release that may be used to match a release
in the library. Consists of these data members:
- ``album``: the release title
- ``album_id``: MusicBrainz ID; UUID fragment only
- ``artist``: name of the release's primary artist
- ``artist_id``
- ``tracks``: list of TrackInfo objects making up the release
- ``asin``: Amazon ASIN
- ``albumtype``: string describing the kind of release
- ``va``: boolean: whether the release has "various artists"
- ``year``: release year
- ``month``: release month
- ``day``: release day
- ``label``: music label responsible for the release
The fields up through ``tracks`` are required. The others are
optional and may be None.
"""
def __init__(self, album, album_id, artist, artist_id, tracks, asin=None,
albumtype=None, va=False, year=None, month=None, day=None,
label=None):
self.album = album
self.album_id = album_id
self.artist = artist
self.artist_id = artist_id
self.tracks = tracks
self.asin = asin
self.albumtype = albumtype
self.va = va
self.year = year
self.month = month
self.day = day
self.label = label
class TrackInfo(object):
"""Describes a canonical track present on a release. Appears as part
of an AlbumInfo's ``tracks`` list. Consists of these data members:
- ``title``: name of the track
- ``track_id``: MusicBrainz ID; UUID fragment only
- ``artist``: individual track artist name
- ``artist_id``
- ``length``: float: duration of the track in seconds
Only ``title`` and ``track_id`` are required. The rest of the fields
may be None.
"""
def __init__(self, title, track_id, artist=None, artist_id=None,
length=None):
self.title = title
self.track_id = track_id
self.artist = artist
self.artist_id = artist_id
self.length = length
# Aggregation of sources.
def _album_for_id(album_id):
"""Get an album corresponding to a MusicBrainz release ID."""
return mb.album_for_id(album_id)
def _track_for_id(track_id):
"""Get an item for a recording MBID."""
return mb.track_for_id(track_id)
def _album_candidates(items, artist, album, va_likely):
"""Search for album matches. ``items`` is a list of Item objects
that make up the album. ``artist`` and ``album`` are the respective
names (strings), which may be derived from the item list or may be
entered by the user. ``va_likely`` is a boolean indicating whether
the album is likely to be a "various artists" release.
"""
out = []
# Base candidates if we have album and artist to match.
if artist and album:
out.extend(mb.match_album(artist, album, len(items)))
# Also add VA matches from MusicBrainz where appropriate.
if va_likely and album:
out.extend(mb.match_album(None, album, len(items)))
# Candidates from plugins.
out.extend(plugins.candidates(items))
return out
def _item_candidates(item, artist, title):
"""Search for item matches. ``item`` is the Item to be matched.
``artist`` and ``title`` are strings and either reflect the item or
are specified by the user.
"""
out = []
# MusicBrainz candidates.
out.extend(mb.match_track(artist, title))
# Plugin candidates.
out.extend(plugins.item_candidates(item))
return out
+490
View File
@@ -0,0 +1,490 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Matches existing metadata with canonical information to identify
releases and tracks.
"""
import logging
import re
from lib.munkres import Munkres
#from unidecode import unidecode
from lib.beets import plugins
from lib.beets.util import levenshtein, plurality
from lib.beets.autotag import hooks
# Distance parameters.
# Text distance weights: proportions on the normalized intuitive edit
# distance.
ARTIST_WEIGHT = 3.0
ALBUM_WEIGHT = 3.0
# The weight of the entire distance calculated for a given track.
TRACK_WEIGHT = 1.0
# These distances are components of the track distance (that is, they
# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
# the overall TRACK_WEIGHT does that).
TRACK_TITLE_WEIGHT = 3.0
# Used instead of a global artist penalty for various-artist matches.
TRACK_ARTIST_WEIGHT = 2.0
# Added when the indices of tracks don't match.
TRACK_INDEX_WEIGHT = 1.0
# Track length weights: no penalty before GRACE, maximum (WEIGHT)
# penalty at GRACE+MAX discrepancy.
TRACK_LENGTH_GRACE = 10
TRACK_LENGTH_MAX = 30
TRACK_LENGTH_WEIGHT = 2.0
# MusicBrainz track ID matches.
TRACK_ID_WEIGHT = 5.0
# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
SD_END_WORDS = ['the', 'a', 'an']
# Reduced weights for certain portions of the string.
SD_PATTERNS = [
(r'^the ', 0.1),
(r'[\[\(]?(ep|single)[\]\)]?', 0.0),
(r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
(r'\(.*?\)', 0.3),
(r'\[.*?\]', 0.3),
(r'(, )?(pt\.|part) .+', 0.2),
]
# Replacements to use before testing distance.
SD_REPLACE = [
(r'&', 'and'),
]
# Recommendation constants.
RECOMMEND_STRONG = 'RECOMMEND_STRONG'
RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
RECOMMEND_NONE = 'RECOMMEND_NONE'
# Thresholds for recommendations.
STRONG_REC_THRESH = 0.04
MEDIUM_REC_THRESH = 0.25
REC_GAP_THRESH = 0.25
# Artist signals that indicate "various artists".
VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
# Autotagging exceptions.
class AutotagError(Exception):
pass
# Global logger.
log = logging.getLogger('beets')
# Primary matching functionality.
def _string_dist_basic(str1, str2):
"""Basic edit distance between two strings, ignoring
non-alphanumeric characters and case. Comparisons are based on a
transliteration/lowering to ASCII characters. Normalized by string
length.
"""
#str1 = unidecode(str1)
#str2 = unidecode(str2)
str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
if not str1 and not str2:
return 0.0
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
def string_dist(str1, str2):
"""Gives an "intuitive" edit distance between two strings. This is
an edit distance, normalized by the string length, with a number of
tweaks that reflect intuition about text.
"""
str1 = str1.lower()
str2 = str2.lower()
# Don't penalize strings that move certain words to the end. For
# example, "the something" should be considered equal to
# "something, the".
for word in SD_END_WORDS:
if str1.endswith(', %s' % word):
str1 = '%s %s' % (word, str1[:-len(word)-2])
if str2.endswith(', %s' % word):
str2 = '%s %s' % (word, str2[:-len(word)-2])
# Perform a couple of basic normalizing substitutions.
for pat, repl in SD_REPLACE:
str1 = re.sub(pat, repl, str1)
str2 = re.sub(pat, repl, str2)
# Change the weight for certain string portions matched by a set
# of regular expressions. We gradually change the strings and build
# up penalties associated with parts of the string that were
# deleted.
base_dist = _string_dist_basic(str1, str2)
penalty = 0.0
for pat, weight in SD_PATTERNS:
# Get strings that drop the pattern.
case_str1 = re.sub(pat, '', str1)
case_str2 = re.sub(pat, '', str2)
if case_str1 != str1 or case_str2 != str2:
# If the pattern was present (i.e., it is deleted in the
# the current case), recalculate the distances for the
# modified strings.
case_dist = _string_dist_basic(case_str1, case_str2)
case_delta = max(0.0, base_dist - case_dist)
if case_delta == 0.0:
continue
# Shift our baseline strings down (to avoid rematching the
# same part of the string) and add a scaled distance
# amount to the penalties.
str1 = case_str1
str2 = case_str2
base_dist = case_dist
penalty += weight * case_delta
dist = base_dist + penalty
return dist
def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
keys = 'artist', 'album'
likelies = {}
consensus = {}
for key in keys:
values = [getattr(item, key) for item in items]
likelies[key], freq = plurality(values)
consensus[key] = (freq == len(values))
return likelies['artist'], likelies['album'], consensus['artist']
def order_items(items, trackinfo):
"""Orders the items based on how they match some canonical track
information. This always produces a result if the numbers of tracks
match.
"""
# Make sure lengths match.
if len(items) != len(trackinfo):
return None
# Construct the cost matrix.
costs = []
for cur_item in items:
row = []
for i, canon_item in enumerate(trackinfo):
row.append(track_distance(cur_item, canon_item, i+1))
costs.append(row)
# Find a minimum-cost bipartite matching.
matching = Munkres().compute(costs)
# Order items based on the matching.
ordered_items = [None]*len(items)
for cur_idx, canon_idx in matching:
ordered_items[canon_idx] = items[cur_idx]
return ordered_items
def track_distance(item, track_info, track_index=None, incl_artist=False):
"""Determines the significance of a track metadata change. Returns
a float in [0.0,1.0]. `track_index` is the track number of the
`track_info` metadata set. If `track_index` is provided and
item.track is set, then these indices are used as a component of
the distance calculation. `incl_artist` indicates that a distance
component should be included for the track artist (i.e., for
various-artist releases).
"""
# Distance and normalization accumulators.
dist, dist_max = 0.0, 0.0
# Check track length.
if not track_info.length:
# If there's no length to check, assume the worst.
dist += TRACK_LENGTH_WEIGHT
else:
diff = abs(item.length - track_info.length)
diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
diff = min(diff, TRACK_LENGTH_MAX)
dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
dist_max += TRACK_LENGTH_WEIGHT
# Track title.
dist += string_dist(item.title, track_info.title) * TRACK_TITLE_WEIGHT
dist_max += TRACK_TITLE_WEIGHT
# Track artist, if included.
# Attention: MB DB does not have artist info for all compilations,
# so only check artist distance if there is actually an artist in
# the MB track data.
if incl_artist and track_info.artist:
dist += string_dist(item.artist, track_info.artist) * \
TRACK_ARTIST_WEIGHT
dist_max += TRACK_ARTIST_WEIGHT
# Track index.
if track_index and item.track:
if track_index != item.track:
dist += TRACK_INDEX_WEIGHT
dist_max += TRACK_INDEX_WEIGHT
# MusicBrainz track ID.
if item.mb_trackid:
if item.mb_trackid != track_info.track_id:
dist += TRACK_ID_WEIGHT
dist_max += TRACK_ID_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.track_distance(item, track_info)
dist += plugin_d
dist_max += plugin_dm
return dist / dist_max
def distance(items, album_info):
"""Determines how "significant" an album metadata change would be.
Returns a float in [0.0,1.0]. The list of items must be ordered.
"""
cur_artist, cur_album, _ = current_metadata(items)
cur_artist = cur_artist or ''
cur_album = cur_album or ''
# These accumulate the possible distance components. The final
# distance will be dist/dist_max.
dist = 0.0
dist_max = 0.0
# Artist/album metadata.
if not album_info.va:
dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT
dist_max += ARTIST_WEIGHT
dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT
dist_max += ALBUM_WEIGHT
# Track distances.
for i, (item, track_info) in enumerate(zip(items, album_info.tracks)):
dist += track_distance(item, track_info, i+1, album_info.va) * \
TRACK_WEIGHT
dist_max += TRACK_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, album_info)
dist += plugin_d
dist_max += plugin_dm
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
else:
return dist/dist_max
def match_by_id(items):
"""If the items are tagged with a MusicBrainz album ID, returns an
info dict for the corresponding album. Otherwise, returns None.
"""
# Is there a consensus on the MB album ID?
albumids = [item.mb_albumid for item in items if item.mb_albumid]
if not albumids:
log.debug('No album IDs found.')
return None
# If all album IDs are equal, look up the album.
if bool(reduce(lambda x,y: x if x==y else (), albumids)):
albumid = albumids[0]
log.debug('Searching for discovered album ID: ' + albumid)
return hooks._album_for_id(albumid)
else:
log.debug('No album ID consensus.')
return None
#fixme In the future, at the expense of performance, we could use
# other IDs (i.e., track and artist) in case the album tag isn't
# present, but that event seems very unlikely.
def recommendation(results):
"""Given a sorted list of result tuples, returns a recommendation
flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
on the results' distances.
"""
if not results:
# No candidates: no recommendation.
rec = RECOMMEND_NONE
else:
min_dist = results[0][0]
if min_dist < STRONG_REC_THRESH:
# Strong recommendation level.
rec = RECOMMEND_STRONG
elif len(results) == 1:
# Only a single candidate. Medium recommendation.
rec = RECOMMEND_MEDIUM
elif min_dist <= MEDIUM_REC_THRESH:
# Medium recommendation level.
rec = RECOMMEND_MEDIUM
elif results[1][0] - min_dist >= REC_GAP_THRESH:
# Gap between first two candidates is large.
rec = RECOMMEND_MEDIUM
else:
# No conclusion.
rec = RECOMMEND_NONE
return rec
def validate_candidate(items, tuple_dict, info):
"""Given a candidate info dict, attempt to add the candidate to
the output dictionary of result tuples. This involves checking
the track count, ordering the items, checking for duplicates, and
calculating the distance.
"""
log.debug('Candidate: %s - %s' % (info.artist, info.album))
# Don't duplicate.
if info.album_id in tuple_dict:
log.debug('Duplicate.')
return
# Make sure the album has the correct number of tracks.
if len(items) != len(info.tracks):
log.debug('Track count mismatch.')
return
# Put items in order.
ordered = order_items(items, info.tracks)
if not ordered:
log.debug('Not orderable.')
return
# Get the change distance.
dist = distance(ordered, info)
log.debug('Success. Distance: %f' % dist)
tuple_dict[info.album_id] = dist, ordered, info
def tag_album(items, timid=False, search_artist=None, search_album=None,
search_id=None):
"""Bundles together the functionality used to infer tags for a
set of items comprised by an album. Returns everything relevant:
- The current artist.
- The current album.
- A list of (distance, items, info) tuples where info is a
dictionary containing the inferred tags and items is a
reordered version of the input items list. The candidates are
sorted by distance (i.e., best match first).
- A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
or RECOMMEND_NONE; indicating that the first candidate is
very likely, it is somewhat likely, or no conclusion could
be reached.
If search_artist and search_album or search_id are provided, then
they are used as search terms in place of the current metadata.
May raise an AutotagError if existing metadata is insufficient.
"""
# Get current metadata.
cur_artist, cur_album, artist_consensus = current_metadata(items)
log.debug('Tagging %s - %s' % (cur_artist, cur_album))
# The output result tuples (keyed by MB album ID).
out_tuples = {}
# Try to find album indicated by MusicBrainz IDs.
if search_id:
log.debug('Searching for album ID: ' + search_id)
id_info = hooks._album_for_id(search_id)
else:
id_info = match_by_id(items)
if id_info:
validate_candidate(items, out_tuples, id_info)
rec = recommendation(out_tuples.values())
log.debug('Album ID match recommendation is ' + str(rec))
if out_tuples and not timid:
# If we have a very good MBID match, return immediately.
# Otherwise, this match will compete against metadata-based
# matches.
if rec == RECOMMEND_STRONG:
log.debug('ID match.')
return cur_artist, cur_album, out_tuples.values(), rec
# If searching by ID, don't continue to metadata search.
if search_id is not None:
if out_tuples:
return cur_artist, cur_album, out_tuples.values(), rec
else:
return cur_artist, cur_album, [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_album):
# No explicit search terms -- use current metadata.
search_artist, search_album = cur_artist, cur_album
log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
# Is this album likely to be a "various artist" release?
va_likely = ((not artist_consensus) or
(search_artist.lower() in VA_ARTISTS) or
any(item.comp for item in items))
log.debug(u'Album might be VA: %s' % str(va_likely))
# Get the results from the data sources.
candidates = hooks._album_candidates(items, search_artist, search_album,
va_likely)
# Get the distance to each candidate.
log.debug(u'Evaluating %i candidates.' % len(candidates))
for info in candidates:
validate_candidate(items, out_tuples, info)
# Sort by distance.
out_tuples = out_tuples.values()
out_tuples.sort()
rec = recommendation(out_tuples)
return cur_artist, cur_album, out_tuples, rec
def tag_item(item, timid=False, search_artist=None, search_title=None,
search_id=None):
"""Attempts to find metadata for a single track. Returns a
`(candidates, recommendation)` pair where `candidates` is a list
of `(distance, track_info)` pairs. `search_artist` and
`search_title` may be used to override the current metadata for
the purposes of the MusicBrainz title; likewise `search_id`.
"""
candidates = []
# First, try matching by MusicBrainz ID.
trackid = search_id or item.mb_trackid
if trackid:
log.debug('Searching for track ID: ' + trackid)
track_info = hooks._track_for_id(trackid)
if track_info:
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# If this is a good match, then don't keep searching.
rec = recommendation(candidates)
if rec == RECOMMEND_STRONG and not timid:
log.debug('Track ID match.')
return candidates, rec
# If we're searching by ID, don't proceed.
if search_id is not None:
if candidates:
return candidates, rec
else:
return [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_title):
search_artist, search_title = item.artist, item.title
log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
# Get and evaluate candidate metadata.
for track_info in hooks._item_candidates(item, search_artist, search_title):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Sort by distance and return with recommendation.
log.debug('Found %i candidates.' % len(candidates))
candidates.sort()
rec = recommendation(candidates)
return candidates, rec
+490
View File
@@ -0,0 +1,490 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Matches existing metadata with canonical information to identify
releases and tracks.
"""
import logging
import re
from munkres import Munkres
from unidecode import unidecode
from beets import plugins
from beets.util import levenshtein, plurality
from beets.autotag import hooks
# Distance parameters.
# Text distance weights: proportions on the normalized intuitive edit
# distance.
ARTIST_WEIGHT = 3.0
ALBUM_WEIGHT = 3.0
# The weight of the entire distance calculated for a given track.
TRACK_WEIGHT = 1.0
# These distances are components of the track distance (that is, they
# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
# the overall TRACK_WEIGHT does that).
TRACK_TITLE_WEIGHT = 3.0
# Used instead of a global artist penalty for various-artist matches.
TRACK_ARTIST_WEIGHT = 2.0
# Added when the indices of tracks don't match.
TRACK_INDEX_WEIGHT = 1.0
# Track length weights: no penalty before GRACE, maximum (WEIGHT)
# penalty at GRACE+MAX discrepancy.
TRACK_LENGTH_GRACE = 10
TRACK_LENGTH_MAX = 30
TRACK_LENGTH_WEIGHT = 2.0
# MusicBrainz track ID matches.
TRACK_ID_WEIGHT = 5.0
# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
SD_END_WORDS = ['the', 'a', 'an']
# Reduced weights for certain portions of the string.
SD_PATTERNS = [
(r'^the ', 0.1),
(r'[\[\(]?(ep|single)[\]\)]?', 0.0),
(r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
(r'\(.*?\)', 0.3),
(r'\[.*?\]', 0.3),
(r'(, )?(pt\.|part) .+', 0.2),
]
# Replacements to use before testing distance.
SD_REPLACE = [
(r'&', 'and'),
]
# Recommendation constants.
RECOMMEND_STRONG = 'RECOMMEND_STRONG'
RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
RECOMMEND_NONE = 'RECOMMEND_NONE'
# Thresholds for recommendations.
STRONG_REC_THRESH = 0.04
MEDIUM_REC_THRESH = 0.25
REC_GAP_THRESH = 0.25
# Artist signals that indicate "various artists".
VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
# Autotagging exceptions.
class AutotagError(Exception):
pass
# Global logger.
log = logging.getLogger('beets')
# Primary matching functionality.
def _string_dist_basic(str1, str2):
"""Basic edit distance between two strings, ignoring
non-alphanumeric characters and case. Comparisons are based on a
transliteration/lowering to ASCII characters. Normalized by string
length.
"""
str1 = unidecode(str1)
str2 = unidecode(str2)
str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
if not str1 and not str2:
return 0.0
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
def string_dist(str1, str2):
"""Gives an "intuitive" edit distance between two strings. This is
an edit distance, normalized by the string length, with a number of
tweaks that reflect intuition about text.
"""
str1 = str1.lower()
str2 = str2.lower()
# Don't penalize strings that move certain words to the end. For
# example, "the something" should be considered equal to
# "something, the".
for word in SD_END_WORDS:
if str1.endswith(', %s' % word):
str1 = '%s %s' % (word, str1[:-len(word)-2])
if str2.endswith(', %s' % word):
str2 = '%s %s' % (word, str2[:-len(word)-2])
# Perform a couple of basic normalizing substitutions.
for pat, repl in SD_REPLACE:
str1 = re.sub(pat, repl, str1)
str2 = re.sub(pat, repl, str2)
# Change the weight for certain string portions matched by a set
# of regular expressions. We gradually change the strings and build
# up penalties associated with parts of the string that were
# deleted.
base_dist = _string_dist_basic(str1, str2)
penalty = 0.0
for pat, weight in SD_PATTERNS:
# Get strings that drop the pattern.
case_str1 = re.sub(pat, '', str1)
case_str2 = re.sub(pat, '', str2)
if case_str1 != str1 or case_str2 != str2:
# If the pattern was present (i.e., it is deleted in the
# the current case), recalculate the distances for the
# modified strings.
case_dist = _string_dist_basic(case_str1, case_str2)
case_delta = max(0.0, base_dist - case_dist)
if case_delta == 0.0:
continue
# Shift our baseline strings down (to avoid rematching the
# same part of the string) and add a scaled distance
# amount to the penalties.
str1 = case_str1
str2 = case_str2
base_dist = case_dist
penalty += weight * case_delta
dist = base_dist + penalty
return dist
def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
keys = 'artist', 'album'
likelies = {}
consensus = {}
for key in keys:
values = [getattr(item, key) for item in items]
likelies[key], freq = plurality(values)
consensus[key] = (freq == len(values))
return likelies['artist'], likelies['album'], consensus['artist']
def order_items(items, trackinfo):
"""Orders the items based on how they match some canonical track
information. This always produces a result if the numbers of tracks
match.
"""
# Make sure lengths match.
if len(items) != len(trackinfo):
return None
# Construct the cost matrix.
costs = []
for cur_item in items:
row = []
for i, canon_item in enumerate(trackinfo):
row.append(track_distance(cur_item, canon_item, i+1))
costs.append(row)
# Find a minimum-cost bipartite matching.
matching = Munkres().compute(costs)
# Order items based on the matching.
ordered_items = [None]*len(items)
for cur_idx, canon_idx in matching:
ordered_items[canon_idx] = items[cur_idx]
return ordered_items
def track_distance(item, track_info, track_index=None, incl_artist=False):
"""Determines the significance of a track metadata change. Returns
a float in [0.0,1.0]. `track_index` is the track number of the
`track_info` metadata set. If `track_index` is provided and
item.track is set, then these indices are used as a component of
the distance calculation. `incl_artist` indicates that a distance
component should be included for the track artist (i.e., for
various-artist releases).
"""
# Distance and normalization accumulators.
dist, dist_max = 0.0, 0.0
# Check track length.
if not track_info.length:
# If there's no length to check, assume the worst.
dist += TRACK_LENGTH_WEIGHT
else:
diff = abs(item.length - track_info.length)
diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
diff = min(diff, TRACK_LENGTH_MAX)
dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
dist_max += TRACK_LENGTH_WEIGHT
# Track title.
dist += string_dist(item.title, track_info.title) * TRACK_TITLE_WEIGHT
dist_max += TRACK_TITLE_WEIGHT
# Track artist, if included.
# Attention: MB DB does not have artist info for all compilations,
# so only check artist distance if there is actually an artist in
# the MB track data.
if incl_artist and track_info.artist:
dist += string_dist(item.artist, track_info.artist) * \
TRACK_ARTIST_WEIGHT
dist_max += TRACK_ARTIST_WEIGHT
# Track index.
if track_index and item.track:
if track_index != item.track:
dist += TRACK_INDEX_WEIGHT
dist_max += TRACK_INDEX_WEIGHT
# MusicBrainz track ID.
if item.mb_trackid:
if item.mb_trackid != track_info.track_id:
dist += TRACK_ID_WEIGHT
dist_max += TRACK_ID_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.track_distance(item, track_info)
dist += plugin_d
dist_max += plugin_dm
return dist / dist_max
def distance(items, album_info):
"""Determines how "significant" an album metadata change would be.
Returns a float in [0.0,1.0]. The list of items must be ordered.
"""
cur_artist, cur_album, _ = current_metadata(items)
cur_artist = cur_artist or ''
cur_album = cur_album or ''
# These accumulate the possible distance components. The final
# distance will be dist/dist_max.
dist = 0.0
dist_max = 0.0
# Artist/album metadata.
if not album_info.va:
dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT
dist_max += ARTIST_WEIGHT
dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT
dist_max += ALBUM_WEIGHT
# Track distances.
for i, (item, track_info) in enumerate(zip(items, album_info.tracks)):
dist += track_distance(item, track_info, i+1, album_info.va) * \
TRACK_WEIGHT
dist_max += TRACK_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, album_info)
dist += plugin_d
dist_max += plugin_dm
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
else:
return dist/dist_max
def match_by_id(items):
"""If the items are tagged with a MusicBrainz album ID, returns an
info dict for the corresponding album. Otherwise, returns None.
"""
# Is there a consensus on the MB album ID?
albumids = [item.mb_albumid for item in items if item.mb_albumid]
if not albumids:
log.debug('No album IDs found.')
return None
# If all album IDs are equal, look up the album.
if bool(reduce(lambda x,y: x if x==y else (), albumids)):
albumid = albumids[0]
log.debug('Searching for discovered album ID: ' + albumid)
return hooks._album_for_id(albumid)
else:
log.debug('No album ID consensus.')
return None
#fixme In the future, at the expense of performance, we could use
# other IDs (i.e., track and artist) in case the album tag isn't
# present, but that event seems very unlikely.
def recommendation(results):
"""Given a sorted list of result tuples, returns a recommendation
flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
on the results' distances.
"""
if not results:
# No candidates: no recommendation.
rec = RECOMMEND_NONE
else:
min_dist = results[0][0]
if min_dist < STRONG_REC_THRESH:
# Strong recommendation level.
rec = RECOMMEND_STRONG
elif len(results) == 1:
# Only a single candidate. Medium recommendation.
rec = RECOMMEND_MEDIUM
elif min_dist <= MEDIUM_REC_THRESH:
# Medium recommendation level.
rec = RECOMMEND_MEDIUM
elif results[1][0] - min_dist >= REC_GAP_THRESH:
# Gap between first two candidates is large.
rec = RECOMMEND_MEDIUM
else:
# No conclusion.
rec = RECOMMEND_NONE
return rec
def validate_candidate(items, tuple_dict, info):
"""Given a candidate info dict, attempt to add the candidate to
the output dictionary of result tuples. This involves checking
the track count, ordering the items, checking for duplicates, and
calculating the distance.
"""
log.debug('Candidate: %s - %s' % (info.artist, info.album))
# Don't duplicate.
if info.album_id in tuple_dict:
log.debug('Duplicate.')
return
# Make sure the album has the correct number of tracks.
if len(items) != len(info.tracks):
log.debug('Track count mismatch.')
return
# Put items in order.
ordered = order_items(items, info.tracks)
if not ordered:
log.debug('Not orderable.')
return
# Get the change distance.
dist = distance(ordered, info)
log.debug('Success. Distance: %f' % dist)
tuple_dict[info.album_id] = dist, ordered, info
def tag_album(items, timid=False, search_artist=None, search_album=None,
search_id=None):
"""Bundles together the functionality used to infer tags for a
set of items comprised by an album. Returns everything relevant:
- The current artist.
- The current album.
- A list of (distance, items, info) tuples where info is a
dictionary containing the inferred tags and items is a
reordered version of the input items list. The candidates are
sorted by distance (i.e., best match first).
- A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
or RECOMMEND_NONE; indicating that the first candidate is
very likely, it is somewhat likely, or no conclusion could
be reached.
If search_artist and search_album or search_id are provided, then
they are used as search terms in place of the current metadata.
May raise an AutotagError if existing metadata is insufficient.
"""
# Get current metadata.
cur_artist, cur_album, artist_consensus = current_metadata(items)
log.debug('Tagging %s - %s' % (cur_artist, cur_album))
# The output result tuples (keyed by MB album ID).
out_tuples = {}
# Try to find album indicated by MusicBrainz IDs.
if search_id:
log.debug('Searching for album ID: ' + search_id)
id_info = hooks._album_for_id(search_id)
else:
id_info = match_by_id(items)
if id_info:
validate_candidate(items, out_tuples, id_info)
rec = recommendation(out_tuples.values())
log.debug('Album ID match recommendation is ' + str(rec))
if out_tuples and not timid:
# If we have a very good MBID match, return immediately.
# Otherwise, this match will compete against metadata-based
# matches.
if rec == RECOMMEND_STRONG:
log.debug('ID match.')
return cur_artist, cur_album, out_tuples.values(), rec
# If searching by ID, don't continue to metadata search.
if search_id is not None:
if out_tuples:
return cur_artist, cur_album, out_tuples.values(), rec
else:
return cur_artist, cur_album, [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_album):
# No explicit search terms -- use current metadata.
search_artist, search_album = cur_artist, cur_album
log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
# Is this album likely to be a "various artist" release?
va_likely = ((not artist_consensus) or
(search_artist.lower() in VA_ARTISTS) or
any(item.comp for item in items))
log.debug(u'Album might be VA: %s' % str(va_likely))
# Get the results from the data sources.
candidates = hooks._album_candidates(items, search_artist, search_album,
va_likely)
# Get the distance to each candidate.
log.debug(u'Evaluating %i candidates.' % len(candidates))
for info in candidates:
validate_candidate(items, out_tuples, info)
# Sort by distance.
out_tuples = out_tuples.values()
out_tuples.sort()
rec = recommendation(out_tuples)
return cur_artist, cur_album, out_tuples, rec
def tag_item(item, timid=False, search_artist=None, search_title=None,
search_id=None):
"""Attempts to find metadata for a single track. Returns a
`(candidates, recommendation)` pair where `candidates` is a list
of `(distance, track_info)` pairs. `search_artist` and
`search_title` may be used to override the current metadata for
the purposes of the MusicBrainz title; likewise `search_id`.
"""
candidates = []
# First, try matching by MusicBrainz ID.
trackid = search_id or item.mb_trackid
if trackid:
log.debug('Searching for track ID: ' + trackid)
track_info = hooks._track_for_id(trackid)
if track_info:
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# If this is a good match, then don't keep searching.
rec = recommendation(candidates)
if rec == RECOMMEND_STRONG and not timid:
log.debug('Track ID match.')
return candidates, rec
# If we're searching by ID, don't proceed.
if search_id is not None:
if candidates:
return candidates, rec
else:
return [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_title):
search_artist, search_title = item.artist, item.title
log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
# Get and evaluate candidate metadata.
for track_info in hooks._item_candidates(item, search_artist, search_title):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Sort by distance and return with recommendation.
log.debug('Found %i candidates.' % len(candidates))
candidates.sort()
rec = recommendation(candidates)
return candidates, rec
+98 -260
View File
@@ -13,24 +13,17 @@
# included in all copies or substantial portions of the Software.
"""Searches for albums in the MusicBrainz database.
This is a thin layer over the official `python-musicbrainz2` module. It
abstracts away that module's object model, the server's Lucene query
syntax, and other uninteresting parts of using musicbrainz2. The
principal interface is the function `match_album`.
"""
from __future__ import with_statement # for Python 2.5
import re
import time
import logging
import lib.musicbrainz2.webservice as mbws
from lib.musicbrainz2.model import Release
from threading import Lock
from lib.musicbrainz2.model import VARIOUS_ARTISTS_ID
SEARCH_LIMIT = 10
VARIOUS_ARTISTS_ID = VARIOUS_ARTISTS_ID.rsplit('/', 1)[1]
from . import musicbrainz3
import lib.beets.autotag.hooks
import lib.beets
SEARCH_LIMIT = 5
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
musicbrainz3._useragent = 'beets/%s' % lib.beets.__version__
class ServerBusyError(Exception): pass
class BadResponseError(Exception): pass
@@ -42,242 +35,84 @@ SPECIAL_CASE_ARTISTS = {
'!!!': 'f26c72d3-e52c-467b-b651-679c73d8e1a7',
}
RELEASE_TYPES = [
Release.TYPE_ALBUM,
Release.TYPE_SINGLE,
Release.TYPE_EP,
Release.TYPE_COMPILATION,
Release.TYPE_SOUNDTRACK,
Release.TYPE_SPOKENWORD,
Release.TYPE_INTERVIEW,
Release.TYPE_AUDIOBOOK,
Release.TYPE_LIVE,
Release.TYPE_REMIX,
Release.TYPE_OTHER
]
RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups',
'labels']
TRACK_INCLUDES = ['artists']
RELEASE_INCLUDES = mbws.ReleaseIncludes(artist=True, tracks=True,
releaseEvents=True, labels=True,
releaseGroup=True)
TRACK_INCLUDES = mbws.TrackIncludes(artist=True)
# MusicBrainz requires that a client does not query the server more
# than once a second. This function enforces that limit using a
# module-global variable to keep track of the last time a query was
# sent.
MAX_QUERY_RETRY = 8
QUERY_WAIT_TIME = 1.0
last_query_time = 0.0
mb_lock = Lock()
def _query_wrap(fun, *args, **kwargs):
"""Wait until at least `QUERY_WAIT_TIME` seconds have passed since
the last invocation of this function. Then call
fun(*args, **kwargs). If it fails due to a "server busy" message,
then try again. Tries up to `MAX_QUERY_RETRY` times before
giving up.
def _adapt_criteria(criteria):
"""Special-case artists in a criteria dictionary before it is passed
to the MusicBrainz search server. The dictionary supplied is
mutated; nothing is returned.
"""
with mb_lock:
global last_query_time
for i in range(MAX_QUERY_RETRY):
since_last_query = time.time() - last_query_time
if since_last_query < QUERY_WAIT_TIME:
time.sleep(QUERY_WAIT_TIME - since_last_query)
last_query_time = time.time()
try:
# Try the function.
res = fun(*args, **kwargs)
except mbws.WebServiceError, e:
# Server busy. Retry.
message = str(e.reason)
for errnum in (503, 504):
if 'Error %i' % errnum in message:
break
else:
# This is not the error we're looking for.
raise
except mbws.ConnectionError:
# Typically a timeout.
pass
except mbws.ResponseError, exc:
# Malformed response from server.
log.error('Bad response from MusicBrainz: ' + str(exc))
raise BadResponseError()
else:
# Success. Return the result.
return res
# Gave up.
raise ServerBusyError()
# FIXME exponential backoff?
def get_releases(**params):
"""Given a list of parameters to ReleaseFilter, executes the
query and yields release dicts (complete with tracks).
"""
# Replace special cases.
if 'artistName' in params:
artist = params['artistName']
if artist in SPECIAL_CASE_ARTISTS:
del params['artistName']
params['artistId'] = SPECIAL_CASE_ARTISTS[artist]
# Issue query.
filt = mbws.ReleaseFilter(**params)
try:
results = _query_wrap(mbws.Query().getReleases, filter=filt)
except BadResponseError:
results = ()
# Construct results.
for result in results:
release = result.release
tracks, _ = release_info(release.id)
yield release_dict(release, tracks)
def release_info(release_id):
"""Given a MusicBrainz release ID, fetch a list of tracks on the
release and the release group ID. If the release is not found,
returns None.
"""
try:
release = _query_wrap(mbws.Query().getReleaseById, release_id,
RELEASE_INCLUDES)
except BadResponseError:
release = None
if release:
return release.getTracks(), release.getReleaseGroup().getId()
else:
return None
def _lucene_escape(text):
"""Escapes a string so it may be used verbatim in a Lucene query
string.
"""
# Regex stolen from MusicBrainz Picard.
out = re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text)
return out.replace('\x00', '')
def _lucene_query(criteria):
"""Given a dictionary containing search criteria, produce a string
that may be used as a MusicBrainz search query.
"""
query_parts = []
for name, value in criteria.items():
value = _lucene_escape(value).strip().lower()
if value:
query_parts.append(u'%s:(%s)' % (name, value))
return u' '.join(query_parts)
def find_releases(criteria, limit=SEARCH_LIMIT):
"""Get a list of release dictionaries from the MusicBrainz
database that match `criteria`. The latter is a dictionary whose
keys are MusicBrainz field names and whose values are search terms
for those fields.
The field names are from MusicBrainz's Lucene query syntax, which
is detailed here:
http://wiki.musicbrainz.org/Text_Search_Syntax
"""
# Replace special cases.
if 'artist' in criteria:
artist = criteria['artist']
if artist in SPECIAL_CASE_ARTISTS:
del criteria['artist']
criteria['arid'] = SPECIAL_CASE_ARTISTS[artist]
# Build the filter and send the query.
if any(criteria.itervalues()):
query = _lucene_query(criteria)
log.debug('album query: %s' % query)
return get_releases(limit=limit, query=query)
for artist, artist_id in SPECIAL_CASE_ARTISTS.items():
if criteria['artist'] == artist:
criteria['arid'] = artist_id
del criteria['artist']
break
def find_tracks(criteria, limit=SEARCH_LIMIT):
"""Get a sequence of track dictionaries from MusicBrainz that match
`criteria`, a search term dictionary similar to the one passed to
`find_releases`.
def track_info(recording):
"""Translates a MusicBrainz recording result dictionary into a beets
``TrackInfo`` object.
"""
if any(criteria.itervalues()):
query = _lucene_query(criteria)
log.debug('track query: %s' % query)
filt = mbws.TrackFilter(limit=limit, query=query)
try:
results = _query_wrap(mbws.Query().getTracks, filter=filt)
except BadResponseError:
results = ()
for result in results:
track = result.track
yield track_dict(track)
info = lib.beets.autotag.hooks.TrackInfo(recording['title'],
recording['id'])
def track_dict(track):
"""Produces a dictionary summarizing a MusicBrainz `Track` object.
"""
t = {'title': track.title,
'id': track.id.rsplit('/', 1)[1]}
if track.artist is not None:
# Track artists will only be present for releases with
# multiple artists.
t['artist'] = track.artist.name
t['artist_id'] = track.artist.id.rsplit('/', 1)[1]
if track.duration is not None:
# Duration not always present.
t['length'] = track.duration/(1000.0)
return t
if 'artist-credit' in recording: # XXX: when is this not included?
artist = recording['artist-credit'][0]['artist']
info.artist = artist['name']
info.artist_id = artist['id']
def release_dict(release, tracks=None):
"""Takes a MusicBrainz `Release` object and returns a dictionary
containing the interesting data about that release. A list of
`Track` objects may also be provided as `tracks`; they are then
included in the resulting dictionary.
if recording.get('length'):
info.length = int(recording['length'])/(1000.0)
return info
def album_info(release):
"""Takes a MusicBrainz release result dictionary and returns a beets
AlbumInfo object containing the interesting data about that release.
"""
# Basic info.
out = {'album': release.title,
'album_id': release.id.rsplit('/', 1)[1],
'artist': release.artist.name,
'artist_id': release.artist.id.rsplit('/', 1)[1],
'asin': release.asin,
'albumtype': '',
}
out['va'] = out['artist_id'] == VARIOUS_ARTISTS_ID
artist = release['artist-credit'][0]['artist']
tracks = []
for medium in release['medium-list']:
tracks.extend(i['recording'] for i in medium['track-list'])
info = lib.beets.autotag.hooks.AlbumInfo(
release['title'],
release['id'],
artist['name'],
artist['id'],
[track_info(track) for track in tracks],
)
info.va = info.artist_id == VARIOUS_ARTISTS_ID
if 'asin' in release:
info.asin = release['asin']
# Release type not always populated.
for releasetype in release.types:
if releasetype in RELEASE_TYPES:
out['albumtype'] = releasetype.split('#')[1].lower()
break
reltype = release['release-group']['type']
if reltype:
info.albumtype = reltype.lower()
# Release date and label.
try:
event = release.getEarliestReleaseEvent()
except:
# The python-musicbrainz2 module has a bug that will raise an
# exception when there is no release date to be found. In this
# case, we just skip adding a release date to the dict.
pass
else:
if event:
# Release date.
date_str = event.getDate()
if date_str:
date_parts = date_str.split('-')
for key in ('year', 'month', 'day'):
if date_parts:
out[key] = int(date_parts.pop(0))
# Release date.
if 'date' in release: # XXX: when is this not included?
date_str = release['date']
if date_str:
date_parts = date_str.split('-')
for key in ('year', 'month', 'day'):
if date_parts:
setattr(info, key, int(date_parts.pop(0)))
# Label name.
label = event.getLabel()
if label:
out['label'] = label.getName()
# Label name.
if release.get('label-info-list'):
label = release['label-info-list'][0]['label']['name']
if label != '[no label]':
info.label = label
# Tracks.
if tracks is not None:
out['tracks'] = map(track_dict, tracks)
return out
return info
def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT):
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over dictionaries of information (as
returned by `release_dict`).
and returns an iterator over AlbumInfo objects.
The query consists of an artist name, an album name, and,
optionally, a number of tracks on the album.
@@ -292,42 +127,45 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT):
if tracks is not None:
criteria['tracks'] = str(tracks)
# Search for the release.
return find_releases(criteria)
_adapt_criteria(criteria)
res = musicbrainz3.release_search(limit=limit, **criteria)
for release in res['release-list']:
# The search result is missing some data (namely, the tracks),
# so we just use the ID and fetch the rest of the information.
yield album_for_id(release['id'])
def match_track(artist, title):
"""Searches for a single track and returns an iterable of track
info dictionaries (as returned by `track_dict`).
def match_track(artist, title, limit=SEARCH_LIMIT):
"""Searches for a single track and returns an iterable of TrackInfo
objects.
"""
return find_tracks({
criteria = {
'artist': artist,
'track': title,
})
'recording': title,
}
_adapt_criteria(criteria)
res = musicbrainz3.recording_search(limit=limit, **criteria)
for recording in res['recording-list']:
yield track_info(recording)
def album_for_id(albumid):
"""Fetches an album by its MusicBrainz ID and returns an
information dictionary. If no match is found, returns None.
"""Fetches an album by its MusicBrainz ID and returns an AlbumInfo
object or None if the album is not found.
"""
query = mbws.Query()
try:
album = _query_wrap(query.getReleaseById, albumid, RELEASE_INCLUDES)
except BadResponseError:
res = musicbrainz3.get_release_by_id(albumid, RELEASE_INCLUDES)
except musicbrainz3.ResponseError:
log.debug('Album ID match failed.')
return None
except (mbws.ResourceNotFoundError, mbws.RequestError), exc:
log.debug('Album ID match failed: ' + str(exc))
return None
return release_dict(album, album.tracks)
return album_info(res['release'])
def track_for_id(trackid):
"""Fetches a track by its MusicBrainz ID. Returns a track info
dictionary or None if no track is found.
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
or None if no track is found.
"""
query = mbws.Query()
try:
track = _query_wrap(query.getTrackById, trackid, TRACK_INCLUDES)
except BadResponseError:
res = musicbrainz3.get_recording_by_id(trackid, TRACK_INCLUDES)
except musicbrainz3.ResponseError:
log.debug('Track ID match failed.')
return None
except (mbws.ResourceNotFoundError, mbws.RequestError), exc:
log.debug('Track ID match failed: ' + str(exc))
return None
return track_dict(track)
return track_info(res['recording'])
+171
View File
@@ -0,0 +1,171 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Searches for albums in the MusicBrainz database.
"""
import logging
from . import musicbrainz3
import beets.autotag.hooks
import beets
SEARCH_LIMIT = 5
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
musicbrainz3._useragent = 'beets/%s' % beets.__version__
class ServerBusyError(Exception): pass
class BadResponseError(Exception): pass
log = logging.getLogger('beets')
# We hard-code IDs for artists that can't easily be searched for.
SPECIAL_CASE_ARTISTS = {
'!!!': 'f26c72d3-e52c-467b-b651-679c73d8e1a7',
}
RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups',
'labels']
TRACK_INCLUDES = ['artists']
def _adapt_criteria(criteria):
"""Special-case artists in a criteria dictionary before it is passed
to the MusicBrainz search server. The dictionary supplied is
mutated; nothing is returned.
"""
if 'artist' in criteria:
for artist, artist_id in SPECIAL_CASE_ARTISTS.items():
if criteria['artist'] == artist:
criteria['arid'] = artist_id
del criteria['artist']
break
def track_info(recording):
"""Translates a MusicBrainz recording result dictionary into a beets
``TrackInfo`` object.
"""
info = beets.autotag.hooks.TrackInfo(recording['title'],
recording['id'])
if 'artist-credit' in recording: # XXX: when is this not included?
artist = recording['artist-credit'][0]['artist']
info.artist = artist['name']
info.artist_id = artist['id']
if recording.get('length'):
info.length = int(recording['length'])/(1000.0)
return info
def album_info(release):
"""Takes a MusicBrainz release result dictionary and returns a beets
AlbumInfo object containing the interesting data about that release.
"""
# Basic info.
artist = release['artist-credit'][0]['artist']
tracks = []
for medium in release['medium-list']:
tracks.extend(i['recording'] for i in medium['track-list'])
info = beets.autotag.hooks.AlbumInfo(
release['title'],
release['id'],
artist['name'],
artist['id'],
[track_info(track) for track in tracks],
)
info.va = info.artist_id == VARIOUS_ARTISTS_ID
if 'asin' in release:
info.asin = release['asin']
# Release type not always populated.
reltype = release['release-group']['type']
if reltype:
info.albumtype = reltype.lower()
# Release date.
if 'date' in release: # XXX: when is this not included?
date_str = release['date']
if date_str:
date_parts = date_str.split('-')
for key in ('year', 'month', 'day'):
if date_parts:
setattr(info, key, int(date_parts.pop(0)))
# Label name.
if release.get('label-info-list'):
label = release['label-info-list'][0]['label']['name']
if label != '[no label]':
info.label = label
return info
def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT):
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over AlbumInfo objects.
The query consists of an artist name, an album name, and,
optionally, a number of tracks on the album.
"""
# Build search criteria.
criteria = {'release': album}
if artist is not None:
criteria['artist'] = artist
else:
# Various Artists search.
criteria['arid'] = VARIOUS_ARTISTS_ID
if tracks is not None:
criteria['tracks'] = str(tracks)
_adapt_criteria(criteria)
res = musicbrainz3.release_search(limit=limit, **criteria)
for release in res['release-list']:
# The search result is missing some data (namely, the tracks),
# so we just use the ID and fetch the rest of the information.
yield album_for_id(release['id'])
def match_track(artist, title, limit=SEARCH_LIMIT):
"""Searches for a single track and returns an iterable of TrackInfo
objects.
"""
criteria = {
'artist': artist,
'recording': title,
}
_adapt_criteria(criteria)
res = musicbrainz3.recording_search(limit=limit, **criteria)
for recording in res['recording-list']:
yield track_info(recording)
def album_for_id(albumid):
"""Fetches an album by its MusicBrainz ID and returns an AlbumInfo
object or None if the album is not found.
"""
try:
res = musicbrainz3.get_release_by_id(albumid, RELEASE_INCLUDES)
except musicbrainz3.ResponseError:
log.debug('Album ID match failed.')
return None
return album_info(res['release'])
def track_for_id(trackid):
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
or None if no track is found.
"""
try:
res = musicbrainz3.get_recording_by_id(trackid, TRACK_INCLUDES)
except musicbrainz3.ResponseError:
log.debug('Track ID match failed.')
return None
return track_info(res['recording'])
+744
View File
@@ -0,0 +1,744 @@
# This is a copy of changeset e60b5af77 from the python-musicbrainz-ngs
# project:
# https://github.com/alastair/python-musicbrainz-ngs/
# MIT license; by Alastair Porter and Adrian Sampson
import urlparse
import urllib2
import urllib
import re
import threading
import time
import logging
import httplib
import xml.etree.ElementTree as etree
from . import mbxml
_useragent = "pythonmusicbrainzngs-0.1"
_log = logging.getLogger("python-musicbrainz-ngs")
# Constants for validation.
VALID_INCLUDES = {
'artist': [
"recordings", "releases", "release-groups", "works", # Subqueries
"various-artists", "discids", "media",
"aliases", "tags", "user-tags", "ratings", "user-ratings", # misc
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels"
],
'label': [
"releases", # Subqueries
"discids", "media",
"aliases", "tags", "user-tags", "ratings", "user-ratings", # misc
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels"
],
'recording': [
"artists", "releases", # Subqueries
"discids", "media", "artist-credits",
"tags", "user-tags", "ratings", "user-ratings", # misc
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels"
],
'release': [
"artists", "labels", "recordings", "release-groups", "media",
"artist-credits", "discids", "puids", "echoprints", "isrcs",
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels", "recording-level-rels",
"work-level-rels"
],
'release-group': [
"artists", "releases", "discids", "media",
"artist-credits", "tags", "user-tags", "ratings", "user-ratings", # misc
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels"
],
'work': [
"artists", # Subqueries
"aliases", "tags", "user-tags", "ratings", "user-ratings", # misc
"artist-rels", "label-rels", "recording-rels", "release-rels",
"release-group-rels", "url-rels", "work-rels"
],
'discid': [
"artists", "labels", "recordings", "release-groups", "puids",
"echoprints", "isrcs"
],
'echoprint': ["artists", "releases"],
'puid': ["artists", "releases", "puids", "echoprints", "isrcs"],
'isrc': ["artists", "releases", "puids", "echoprints", "isrcs"],
'iswc': ["artists"],
}
VALID_RELEASE_TYPES = [
"nat", "album", "single", "ep", "compilation", "soundtrack", "spokenword",
"interview", "audiobook", "live", "remix", "other"
]
VALID_RELEASE_STATUSES = ["official", "promotion", "bootleg", "pseudo-release"]
VALID_SEARCH_FIELDS = {
'artist': [
'arid', 'artist', 'sortname', 'type', 'begin', 'end', 'comment',
'alias', 'country', 'gender', 'tag'
],
'release-group': [
'rgid', 'releasegroup', 'reid', 'release', 'arid', 'artist',
'artistname', 'creditname', 'type', 'tag'
],
'release': [
'reid', 'release', 'arid', 'artist', 'artistname', 'creditname',
'type', 'status', 'tracks', 'tracksmedium', 'discids',
'discidsmedium', 'mediums', 'date', 'asin', 'lang', 'script',
'country', 'date', 'label', 'catno', 'barcode', 'puid'
],
'recording': [
'rid', 'recording', 'isrc', 'arid', 'artist', 'artistname',
'creditname', 'reid', 'release', 'type', 'status', 'tracks',
'tracksrelease', 'dur', 'qdur', 'tnum', 'position', 'tag'
],
'label': [
'laid', 'label', 'sortname', 'type', 'code', 'country', 'begin',
'end', 'comment', 'alias', 'tag'
],
'work': [
'wid', 'work', 'iswc', 'type', 'arid', 'artist', 'alias', 'tag'
],
}
# Exceptions.
class MusicBrainzError(Exception):
"""Base class for all exceptions related to MusicBrainz."""
pass
class UsageError(MusicBrainzError):
"""Error related to misuse of the module API."""
pass
class InvalidSearchFieldError(UsageError):
pass
class InvalidIncludeError(UsageError):
def __init__(self, msg='Invalid Includes', reason=None):
super(InvalidIncludeError, self).__init__(self)
self.msg = msg
self.reason = reason
def __str__(self):
return self.msg
class InvalidFilterError(UsageError):
def __init__(self, msg='Invalid Includes', reason=None):
super(InvalidFilterError, self).__init__(self)
self.msg = msg
self.reason = reason
def __str__(self):
return self.msg
class WebServiceError(MusicBrainzError):
"""Error related to MusicBrainz API requests."""
def __init__(self, message=None, cause=None):
"""Pass ``cause`` if this exception was caused by another
exception.
"""
self.message = message
self.cause = cause
def __str__(self):
if self.message:
msg = "%s, " % self.message
else:
msg = ""
msg += "caused by: %s" % str(self.cause)
return msg
class NetworkError(WebServiceError):
"""Problem communicating with the MB server."""
pass
class ResponseError(WebServiceError):
"""Bad response sent by the MB server."""
pass
# Helpers for validating and formatting allowed sets.
def _check_includes_impl(includes, valid_includes):
for i in includes:
if i not in valid_includes:
raise InvalidIncludeError("Bad includes", "%s is not a valid include" % i)
def _check_includes(entity, inc):
_check_includes_impl(inc, VALID_INCLUDES[entity])
def _check_filter(values, valid):
for v in values:
if v not in valid:
raise InvalidFilterError(v)
def _check_filter_and_make_params(includes, release_status=[], release_type=[]):
"""Check that the status or type values are valid. Then, check that
the filters can be used with the given includes. Return a params
dict that can be passed to _do_mb_query.
"""
if isinstance(release_status, basestring):
release_status = [release_status]
if isinstance(release_type, basestring):
release_type = [release_type]
_check_filter(release_status, VALID_RELEASE_STATUSES)
_check_filter(release_type, VALID_RELEASE_TYPES)
if release_status and "releases" not in includes:
raise InvalidFilterError("Can't have a status with no release include")
if release_type and ("release-groups" not in includes and
"releases" not in includes):
raise InvalidFilterError("Can't have a release type with no "
"release-group include")
# Build parameters.
params = {}
if len(release_status):
params["status"] = "|".join(release_status)
if len(release_type):
params["type"] = "|".join(release_type)
return params
# Global authentication and endpoint details.
user = password = ""
hostname = "musicbrainz.org"
_client = ""
def auth(u, p):
"""Set the username and password to be used in subsequent queries to
the MusicBrainz XML API that require authentication.
"""
global user, password
user = u
password = p
def set_client(c):
""" Set the client to be used in requests. This must be set before any
data submissions are made.
"""
global _client
_client = c
# Rate limiting.
limit_interval = 1.0
limit_requests = 1
def set_rate_limit(new_interval=1.0, new_requests=1):
"""Sets the rate limiting behavior of the module. Must be invoked
before the first Web service call. Specify the number of requests
(`new_requests`) that may be made per given interval
(`new_interval`).
"""
global limit_interval
global limit_requests
limit_interval = new_interval
limit_requests = new_requests
class _rate_limit(object):
"""A decorator that limits the rate at which the function may be
called. The rate is controlled by the `limit_interval` and
`limit_requests` global variables. The limiting is thread-safe;
only one thread may be in the function at a time (acts like a
monitor in this sense). The globals must be set before the first
call to the limited function.
"""
def __init__(self, fun):
self.fun = fun
self.last_call = 0.0
self.lock = threading.Lock()
self.remaining_requests = None # Set on first invocation.
def _update_remaining(self):
"""Update remaining requests based on the elapsed time since
they were last calculated.
"""
# On first invocation, we have the maximum number of requests
# available.
if self.remaining_requests is None:
self.remaining_requests = float(limit_requests)
else:
since_last_call = time.time() - self.last_call
self.remaining_requests += since_last_call * \
(limit_requests / limit_interval)
self.remaining_requests = min(self.remaining_requests,
float(limit_requests))
self.last_call = time.time()
def __call__(self, *args, **kwargs):
with self.lock:
self._update_remaining()
# Delay if necessary.
while self.remaining_requests < 0.999:
time.sleep((1.0 - self.remaining_requests) *
(limit_requests / limit_interval))
self._update_remaining()
# Call the original function, "paying" for this call.
self.remaining_requests -= 1.0
return self.fun(*args, **kwargs)
# Generic support for making HTTP requests.
# From pymb2
class _RedirectPasswordMgr(urllib2.HTTPPasswordMgr):
def __init__(self):
self._realms = { }
def find_user_password(self, realm, uri):
# ignoring the uri parameter intentionally
try:
return self._realms[realm]
except KeyError:
return (None, None)
def add_password(self, realm, uri, username, password):
# ignoring the uri parameter intentionally
self._realms[realm] = (username, password)
class _DigestAuthHandler(urllib2.HTTPDigestAuthHandler):
def get_authorization (self, req, chal):
qop = chal.get ('qop', None)
if qop and ',' in qop and 'auth' in qop.split (','):
chal['qop'] = 'auth'
return urllib2.HTTPDigestAuthHandler.get_authorization (self, req, chal)
class _MusicbrainzHttpRequest(urllib2.Request):
""" A custom request handler that allows DELETE and PUT"""
def __init__(self, method, url, data=None):
urllib2.Request.__init__(self, url, data)
allowed_m = ["GET", "POST", "DELETE", "PUT"]
if method not in allowed_m:
raise ValueError("invalid method: %s" % method)
self.method = method
def get_method(self):
return self.method
# Core (internal) functions for calling the MB API.
def _safe_open(opener, req, body=None, max_retries=8, retry_delay_delta=2.0):
"""Open an HTTP request with a given URL opener and (optionally) a
request body. Transient errors lead to retries. Permanent errors
and repeated errors are translated into a small set of handleable
exceptions. Returns a file-like object.
"""
last_exc = None
for retry_num in range(max_retries):
if retry_num: # Not the first try: delay an increasing amount.
_log.debug("retrying after delay (#%i)" % retry_num)
time.sleep(retry_num * retry_delay_delta)
try:
if body:
f = opener.open(req, body)
else:
f = opener.open(req)
except urllib2.HTTPError, exc:
if exc.code in (400, 404):
# Bad request, not found, etc.
raise ResponseError(cause=exc)
elif exc.code in (503, 502, 500):
# Rate limiting, internal overloading...
_log.debug("HTTP error %i" % exc.code)
else:
# Other, unknown error. Should handle more cases, but
# retrying for now.
_log.debug("unknown HTTP error %i" % exc.code)
last_exc = exc
except httplib.BadStatusLine, exc:
_log.debug("bad status line")
last_exc = exc
except httplib.HTTPException, exc:
_log.debug("miscellaneous HTTP exception: %s" % str(exc))
last_exc = exc
except urllib2.URLError, exc:
raise NetworkError(cause=exc)
except IOError, exc:
raise NetworkError(cause=exc)
else:
# No exception! Yay!
return f
# Out of retries!
raise NetworkError("retried %i times" % max_retries, last_exc)
@_rate_limit
def _mb_request(path, method='GET', auth_required=False, client_required=False,
args=None, data=None, body=None):
"""Makes a request for the specified `path` (endpoint) on /ws/2 on
the globally-specified hostname. Parses the responses and returns
the resulting object. `auth_required` and `client_required` control
whether exceptions should be raised if the client and
username/password are left unspecified, respectively.
"""
args = dict(args) or {}
# Add client if required.
if client_required and _client == "":
raise UsageError("set a client name with "
"musicbrainz.set_client(\"client-version\")")
elif client_required:
args["client"] = _client
# Construct the full URL for the request, including hostname and
# query string.
url = urlparse.urlunparse((
'http',
hostname,
'/ws/2/%s' % path,
'',
urllib.urlencode(args),
''
))
_log.debug("%s request for %s" % (method, url))
# Set up HTTP request handler and URL opener.
httpHandler = urllib2.HTTPHandler(debuglevel=0)
handlers = [httpHandler]
opener = urllib2.build_opener(*handlers)
# Add credentials if required.
if auth_required:
if not user:
raise UsageError("authorization required; "
"use musicbrainz.auth(u, p) first")
passwordMgr = _RedirectPasswordMgr()
authHandler = _DigestAuthHandler(passwordMgr)
authHandler.add_password("musicbrainz.org", (), user, password)
handlers.append(authHandler)
# Make request.
req = _MusicbrainzHttpRequest(method, url, data)
req.add_header('User-Agent', _useragent)
if body:
req.add_header('Content-Type', 'application/xml; charset=UTF-8')
f = _safe_open(opener, req, body)
# Parse the response.
try:
return mbxml.parse_message(f)
except etree.ParseError, exc:
raise ResponseError(cause=exc)
except UnicodeError, exc:
raise ResponseError(cause=exc)
def _is_auth_required(entity, includes):
""" Some calls require authentication. This returns
True if a call does, False otherwise
"""
if "user-tags" in includes or "user-ratings" in includes:
return True
elif entity.startswith("collection"):
return True
else:
return False
def _do_mb_query(entity, id, includes=[], params={}):
"""Make a single GET call to the MusicBrainz XML API. `entity` is a
string indicated the type of object to be retrieved. The id may be
empty, in which case the query is a search. `includes` is a list
of strings that must be valid includes for the entity type. `params`
is a dictionary of additional parameters for the API call. The
response is parsed and returned.
"""
# Build arguments.
_check_includes(entity, includes)
auth_required = _is_auth_required(entity, includes)
args = dict(params)
if len(includes) > 0:
inc = " ".join(includes)
args["inc"] = inc
# Build the endpoint components.
path = '%s/%s' % (entity, id)
return _mb_request(path, 'GET', auth_required, args=args)
def _do_mb_search(entity, query='', fields={}, limit=None, offset=None):
"""Perform a full-text search on the MusicBrainz search server.
`query` is a free-form query string and `fields` is a dictionary
of key/value query parameters. They keys in `fields` must be valid
for the given entity type.
"""
# Encode the query terms as a Lucene query string.
query_parts = [query.replace('\x00', '').strip()]
for key, value in fields.iteritems():
# Ensure this is a valid search field.
if key not in VALID_SEARCH_FIELDS[entity]:
raise InvalidSearchFieldError(
'%s is not a valid search field for %s' % (key, entity)
)
# Escape Lucene's special characters.
value = re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', value)
value = value.replace('\x00', '').strip()
if value:
query_parts.append(u'%s:(%s)' % (key, value))
full_query = u' '.join(query_parts).strip()
if not full_query:
raise ValueError('at least one query term is required')
# Additional parameters to the search.
params = {'query': full_query}
if limit:
params['limit'] = str(limit)
if offset:
params['offset'] = str(offset)
return _do_mb_query(entity, '', [], params)
def _do_mb_delete(path):
"""Send a DELETE request for the specified object.
"""
return _mb_request(path, 'DELETE', True, True)
def _do_mb_put(path):
"""Send a PUT request for the specified object.
"""
return _mb_request(path, 'PUT', True, True)
def _do_mb_post(path, body):
"""Perform a single POST call for an endpoint with a specified
request body.
"""
return _mb_request(path, 'PUT', True, True, body=body)
# The main interface!
# Single entity by ID
def get_artist_by_id(id, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("artist", id, includes, params)
def get_label_by_id(id, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("label", id, includes, params)
def get_recording_by_id(id, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("recording", id, includes, params)
def get_release_by_id(id, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("release", id, includes, params)
def get_release_group_by_id(id, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("release-group", id, includes, params)
def get_work_by_id(id, includes=[]):
return _do_mb_query("work", id, includes)
# Searching
def artist_search(query='', limit=None, offset=None, **fields):
"""Search for artists by a free-form `query` string and/or any of
the following keyword arguments specifying field queries:
arid, artist, sortname, type, begin, end, comment, alias, country,
gender, tag
"""
return _do_mb_search('artist', query, fields, limit, offset)
def label_search(query='', limit=None, offset=None, **fields):
"""Search for labels by a free-form `query` string and/or any of
the following keyword arguments specifying field queries:
laid, label, sortname, type, code, country, begin, end, comment,
alias, tag
"""
return _do_mb_search('label', query, fields, limit, offset)
def recording_search(query='', limit=None, offset=None, **fields):
"""Search for recordings by a free-form `query` string and/or any of
the following keyword arguments specifying field queries:
rid, recording, isrc, arid, artist, artistname, creditname, reid,
release, type, status, tracks, tracksrelease, dur, qdur, tnum,
position, tag
"""
return _do_mb_search('recording', query, fields, limit, offset)
def release_search(query='', limit=None, offset=None, **fields):
"""Search for releases by a free-form `query` string and/or any of
the following keyword arguments specifying field queries:
reid, release, arid, artist, artistname, creditname, type, status,
tracks, tracksmedium, discids, discidsmedium, mediums, date, asin,
lang, script, country, date, label, catno, barcode, puid
"""
return _do_mb_search('release', query, fields, limit, offset)
def release_group_search(query='', limit=None, offset=None, **fields):
"""Search for release groups by a free-form `query` string and/or
any of the following keyword arguments specifying field queries:
rgid, releasegroup, reid, release, arid, artist, artistname,
creditname, type, tag
"""
return _do_mb_search('release-group', query, fields, limit, offset)
def work_search(query='', limit=None, offset=None, **fields):
"""Search for works by a free-form `query` string and/or any of
the following keyword arguments specifying field queries:
wid, work, iswc, type, arid, artist, alias, tag
"""
return _do_mb_search('work', query, fields, limit, offset)
# Lists of entities
def get_releases_by_discid(id, includes=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_type=release_type)
return _do_mb_query("discid", id, includes, params)
def get_recordings_by_echoprint(echoprint, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("echoprint", echoprint, includes, params)
def get_recordings_by_puid(puid, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("puid", puid, includes, params)
def get_recordings_by_isrc(isrc, includes=[], release_status=[], release_type=[]):
params = _check_filter_and_make_params(includes, release_status, release_type)
return _do_mb_query("isrc", isrc, includes, params)
def get_works_by_iswc(iswc, includes=[]):
return _do_mb_query("iswc", iswc, includes)
# Browse methods
# Browse include are a subset of regular get includes, so we check them here
# and the test in _do_mb_query will pass anyway.
def browse_artist(recording=None, release=None, release_group=None, includes=[], limit=None, offset=None):
# optional parameter work?
_check_includes_impl(includes, ["aliases", "tags", "ratings", "user-tags", "user-ratings"])
p = {}
if recording: p["recording"] = recording
if release: p["release"] = release
if release_group: p["release-group"] = release_group
#if work: p["work"] = work
if len(p) > 1:
raise Exception("Can't have more than one of recording, release, release_group, work")
if limit: p["limit"] = limit
if offset: p["offset"] = offset
return _do_mb_query("artist", "", includes, p)
def browse_label(release=None, includes=[], limit=None, offset=None):
_check_includes_impl(includes, ["aliases", "tags", "ratings", "user-tags", "user-ratings"])
p = {"release": release}
if limit: p["limit"] = limit
if offset: p["offset"] = offset
return _do_mb_query("label", "", includes, p)
def browse_recording(artist=None, release=None, includes=[], limit=None, offset=None):
_check_includes_impl(includes, ["artist-credits", "tags", "ratings", "user-tags", "user-ratings"])
p = {}
if artist: p["artist"] = artist
if release: p["release"] = release
if len(p) > 1:
raise Exception("Can't have more than one of artist, release")
if limit: p["limit"] = limit
if offset: p["offset"] = offset
return _do_mb_query("recording", "", includes, p)
def browse_release(artist=None, label=None, recording=None, release_group=None, release_status=[], release_type=[], includes=[], limit=None, offset=None):
# track_artist param doesn't work yet
_check_includes_impl(includes, ["artist-credits", "labels", "recordings"])
p = {}
if artist: p["artist"] = artist
#if track_artist: p["track_artist"] = track_artist
if label: p["label"] = label
if recording: p["recording"] = recording
if release_group: p["release-group"] = release_group
if len(p) > 1:
raise Exception("Can't have more than one of artist, label, recording, release_group")
if limit: p["limit"] = limit
if offset: p["offset"] = offset
filterp = _check_filter_and_make_params("releases", release_status, release_type)
p.update(filterp)
if len(release_status) == 0 and len(release_type) == 0:
raise InvalidFilterError("Need at least one release status or type")
return _do_mb_query("release", "", includes, p)
def browse_release_group(artist=None, release=None, release_type=[], includes=[], limit=None, offset=None):
_check_includes_impl(includes, ["artist-credits", "tags", "ratings", "user-tags", "user-ratings"])
p = {}
if artist: p["artist"] = artist
if release: p["release"] = release
if len(p) > 1:
raise Exception("Can't have more than one of artist, release")
if limit: p["limit"] = limit
if offset: p["offset"] = offset
filterp = _check_filter_and_make_params("release-groups", [], release_type)
p.update(filterp)
if len(release_type) == 0:
raise InvalidFilterError("Need at least one release type")
return _do_mb_query("release-group", "", includes, p)
# browse_work is defined in the docs but has no browse criteria
# Collections
def get_all_collections():
# Missing <release-list count="n"> the count in the reply
return _do_mb_query("collection", '')
def get_releases_in_collection(collection):
return _do_mb_query("collection", "%s/releases" % collection)
# Submission methods
def submit_barcodes(barcodes):
"""
Submits a set of {release1: barcode1, release2:barcode2}
Must call auth(user, pass) first
"""
query = mbxml.make_barcode_request(barcodes)
return _do_mb_post("release", query)
def submit_puids(puids):
query = mbxml.make_puid_request(puids)
return _do_mb_post("recording", query)
def submit_echoprints(echoprints):
query = mbxml.make_echoprint_request(echoprints)
return _do_mb_post("recording", query)
def submit_isrcs(isrcs):
raise NotImplementedError
def submit_tags(artist_tags={}, recording_tags={}):
""" Submit user tags.
Artist or recording parameters are of the form:
{'entityid': [taglist]}
"""
query = mbxml.make_tag_request(artist_tags, recording_tags)
return _do_mb_post("tag", query)
def submit_ratings(artist_ratings={}, recording_ratings={}):
""" Submit user ratings.
Artist or recording parameters are of the form:
{'entityid': rating}
"""
query = mbxml.make_rating_request(artist_ratings, recording_ratings)
return _do_mb_post("rating", query)
def add_releases_to_collection(collection, releases=[]):
# XXX: Maximum URI length of 16kb means we should only allow ~400 releases
releaselist = ";".join(releases)
_do_mb_put("collection/%s/releases/%s" % (collection, releaselist))
def remove_releases_from_collection(collection, releases=[]):
releaselist = ";".join(releases)
_do_mb_delete("collection/%s/releases/%s" % (collection, releaselist))
+545
View File
@@ -0,0 +1,545 @@
import xml.etree.ElementTree as ET
import string
import StringIO
import logging
try:
from ET import fixtag
except:
# Python < 2.7
def fixtag(tag, namespaces):
# given a decorated tag (of the form {uri}tag), return prefixed
# tag and namespace declaration, if any
if isinstance(tag, ET.QName):
tag = tag.text
namespace_uri, tag = string.split(tag[1:], "}", 1)
prefix = namespaces.get(namespace_uri)
if prefix is None:
prefix = "ns%d" % len(namespaces)
namespaces[namespace_uri] = prefix
if prefix == "xml":
xmlns = None
else:
xmlns = ("xmlns:%s" % prefix, namespace_uri)
else:
xmlns = None
return "%s:%s" % (prefix, tag), xmlns
NS_MAP = {"http://musicbrainz.org/ns/mmd-2.0#": "ws2"}
def make_artist_credit(artists):
names = []
for artist in artists:
if isinstance(artist, dict):
names.append(artist.get("artist", {}).get("name", ""))
else:
names.append(artist)
return "".join(names)
def parse_elements(valid_els, element):
""" Extract single level subelements from an element.
For example, given the element:
<element>
<subelement>Text</subelement>
</element>
and a list valid_els that contains "subelement",
return a dict {'subelement': 'Text'}
"""
result = {}
for sub in element:
t = fixtag(sub.tag, NS_MAP)[0]
if ":" in t:
t = t.split(":")[1]
if t in valid_els:
result[t] = sub.text
else:
logging.debug("in <%s>, uncaught <%s>", fixtag(element.tag, NS_MAP)[0], t)
return result
def parse_attributes(attributes, element):
""" Extract attributes from an element.
For example, given the element:
<element type="Group" />
and a list attributes that contains "type",
return a dict {'type': 'Group'}
"""
result = {}
for attr in attributes:
if attr in element.attrib:
result[attr] = element.attrib[attr]
else:
logging.debug("in <%s>, uncaught attribute %s", fixtag(element.tag, NS_MAP)[0], attr)
return result
def parse_inner(inner_els, element):
""" Delegate the parsing of a subelement to another function.
For example, given the element:
<element>
<subelement>
<a>Foo</a><b>Bar</b>
</subelement>
</element>
and a dictionary {'subelement': parse_subelement},
call parse_subelement(<subelement>) and
return a dict {'subelement': <result>}
if parse_subelement returns a tuple of the form
('subelement-key', <result>) then return a dict
{'subelement-key': <result>} instead
"""
result = {}
for sub in element:
t = fixtag(sub.tag, NS_MAP)[0]
if ":" in t:
t = t.split(":")[1]
if t in inner_els.keys():
inner_result = inner_els[t](sub)
if isinstance(inner_result, tuple):
result[inner_result[0]] = inner_result[1]
else:
result[t] = inner_result
else:
logging.debug("in <%s>, not delegating <%s>", fixtag(element.tag, NS_MAP)[0], t)
return result
def parse_message(message):
s = message.read()
f = StringIO.StringIO(s)
tree = ET.ElementTree(file=f)
root = tree.getroot()
result = {}
valid_elements = {"artist": parse_artist,
"label": parse_label,
"release": parse_release,
"release-group": parse_release_group,
"recording": parse_recording,
"work": parse_work,
"disc": parse_disc,
"puid": parse_puid,
"echoprint": parse_puid,
"artist-list": parse_artist_list,
"label-list": parse_label_list,
"release-list": parse_release_list,
"release-group-list": parse_release_group_list,
"recording-list": parse_recording_list,
"work-list": parse_work_list,
"collection-list": parse_collection_list,
"collection": parse_collection,
"message": parse_response_message
}
result.update(parse_inner(valid_elements, root))
return result
def parse_response_message(message):
return parse_elements(["text"], message)
def parse_collection_list(cl):
return [parse_collection(c) for c in cl]
def parse_collection(collection):
result = {}
attribs = ["id"]
elements = ["name", "editor"]
inner_els = {"release-list": parse_release_list}
result.update(parse_attributes(attribs, collection))
result.update(parse_elements(elements, collection))
result.update(parse_inner(inner_els, collection))
return result
def parse_collection_release_list(rl):
attribs = ["count"]
return parse_attributes(attribs, rl)
def parse_artist_lifespan(lifespan):
parts = parse_elements(["begin", "end"], lifespan)
beginval = parts.get("begin", "")
endval = parts.get("end", "")
return (beginval, endval)
def parse_artist_list(al):
return [parse_artist(a) for a in al]
def parse_artist(artist):
result = {}
attribs = ["id", "type"]
elements = ["name", "sort-name", "country", "user-rating"]
inner_els = {"life-span": parse_artist_lifespan,
"recording-list": parse_recording_list,
"release-list": parse_release_list,
"release-group-list": parse_release_group_list,
"work-list": parse_work_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"alias-list": parse_alias_list}
result.update(parse_attributes(attribs, artist))
result.update(parse_elements(elements, artist))
result.update(parse_inner(inner_els, artist))
return result
def parse_label_list(ll):
return [parse_label(l) for l in ll]
def parse_label(label):
result = {}
attribs = ["id", "type"]
elements = ["name", "sort-name", "country", "label-code", "user-rating"]
inner_els = {"life-span": parse_artist_lifespan,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"alias-list": parse_alias_list}
result.update(parse_attributes(attribs, label))
result.update(parse_elements(elements, label))
result.update(parse_inner(inner_els, label))
return result
def parse_attribute_list(al):
return [parse_attribute_tag(a) for a in al]
def parse_attribute_tag(attribute):
return attribute.text
def parse_relation_list(rl):
attribs = ["target-type"]
ttype = parse_attributes(attribs, rl)
key = "%s-relation-list" % ttype["target-type"]
return (key, [parse_relation(r) for r in rl])
def parse_relation(relation):
result = {}
attribs = ["type"]
elements = ["target", "direction"]
inner_els = {"artist": parse_artist,
"label": parse_label,
"recording": parse_recording,
"release": parse_release,
"release-group": parse_release_group,
"attribute-list": parse_attribute_list,
"work": parse_work
}
result.update(parse_attributes(attribs, relation))
result.update(parse_elements(elements, relation))
result.update(parse_inner(inner_els, relation))
return result
def parse_release(release):
result = {}
attribs = ["id"]
elements = ["title", "status", "disambiguation", "quality", "country", "barcode", "date", "packaging", "asin"]
inner_els = {"text-representation": parse_text_representation,
"artist-credit": parse_artist_credit,
"label-info-list": parse_label_info_list,
"medium-list": parse_medium_list,
"release-group": parse_release_group,
"relation-list": parse_relation_list}
result.update(parse_attributes(attribs, release))
result.update(parse_elements(elements, release))
result.update(parse_inner(inner_els, release))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
return result
def parse_medium_list(ml):
return [parse_medium(m) for m in ml]
def parse_medium(medium):
result = {}
elements = ["position", "format", "title"]
inner_els = {"disc-list": parse_disc_list,
"track-list": parse_track_list}
result.update(parse_elements(elements, medium))
result.update(parse_inner(inner_els, medium))
return result
def parse_disc_list(dl):
return [parse_disc(d) for d in dl]
def parse_text_representation(textr):
return parse_elements(["language", "script"], textr)
def parse_release_group(rg):
result = {}
attribs = ["id", "type"]
elements = ["title", "user-rating", "first-release-date"]
inner_els = {"artist-credit": parse_artist_credit,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating}
result.update(parse_attributes(attribs, rg))
result.update(parse_elements(elements, rg))
result.update(parse_inner(inner_els, rg))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
return result
def parse_recording(recording):
result = {}
attribs = ["id"]
elements = ["title", "length", "user-rating"]
inner_els = {"artist-credit": parse_artist_credit,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"puid-list": parse_external_id_list,
"isrc-list": parse_external_id_list,
"echoprint-list": parse_external_id_list}
result.update(parse_attributes(attribs, recording))
result.update(parse_elements(elements, recording))
result.update(parse_inner(inner_els, recording))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
return result
def parse_external_id_list(pl):
return [parse_attributes(["id"], p)["id"] for p in pl]
def parse_work_list(wl):
result = []
for w in wl:
result.append(parse_work(w))
return result
def parse_work(work):
result = {}
attribs = ["id"]
elements = ["title", "user-rating"]
inner_els = {"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"alias-list": parse_alias_list}
result.update(parse_attributes(attribs, work))
result.update(parse_elements(elements, work))
result.update(parse_inner(inner_els, work))
return result
def parse_disc(disc):
result = {}
attribs = ["id"]
elements = ["sectors"]
inner_els = {"release-list": parse_release_list}
result.update(parse_attributes(attribs, disc))
result.update(parse_elements(elements, disc))
result.update(parse_inner(inner_els, disc))
return result
def parse_release_list(rl):
result = []
for r in rl:
result.append(parse_release(r))
return result
def parse_release_group_list(rgl):
result = []
for rg in rgl:
result.append(parse_release_group(rg))
return result
def parse_puid(puid):
result = {}
attribs = ["id"]
inner_els = {"recording-list": parse_recording_list}
result.update(parse_attributes(attribs, puid))
result.update(parse_inner(inner_els, puid))
return result
def parse_recording_list(recs):
result = []
for r in recs:
result.append(parse_recording(r))
return result
def parse_artist_credit(ac):
result = []
for namecredit in ac:
result.append(parse_name_credit(namecredit))
join = parse_attributes(["joinphrase"], namecredit)
if "joinphrase" in join:
result.append(join["joinphrase"])
return result
def parse_name_credit(nc):
result = {}
elements = ["name"]
inner_els = {"artist": parse_artist}
result.update(parse_elements(elements, nc))
result.update(parse_inner(inner_els, nc))
return result
def parse_label_info_list(lil):
result = []
for li in lil:
result.append(parse_label_info(li))
return result
def parse_label_info(li):
result = {}
elements = ["catalog-number"]
inner_els = {"label": parse_label}
result.update(parse_elements(elements, li))
result.update(parse_inner(inner_els, li))
return result
def parse_track_list(tl):
result = []
for t in tl:
result.append(parse_track(t))
return result
def parse_track(track):
result = {}
elements = ["position"]
inner_els = {"recording": parse_recording}
result.update(parse_elements(elements, track))
result.update(parse_inner(inner_els, track))
return result
def parse_tag_list(tl):
result = []
for t in tl:
result.append(parse_tag(t))
return result
def parse_tag(tag):
result = {}
attribs = ["count"]
elements = ["name"]
result.update(parse_attributes(attribs, tag))
result.update(parse_elements(elements, tag))
return result
def parse_rating(rating):
result = {}
attribs = ["votes-count"]
result.update(parse_attributes(attribs, rating))
result["rating"] = rating.text
return result
def parse_alias_list(al):
result = []
for a in al:
result.append(a.text)
return result
###
def make_barcode_request(barcodes):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rel_list = ET.SubElement(root, "{%s}release-list" % NS)
for release, barcode in barcodes.items():
rel_xml = ET.SubElement(rel_list, "{%s}release" % NS)
bar_xml = ET.SubElement(rel_xml, "{%s}barcode" % NS)
rel_xml.set("{%s}id" % NS, release)
bar_xml.text = barcode
return ET.tostring(root, "utf-8")
def make_puid_request(puids):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rec_list = ET.SubElement(root, "{%s}recording-list" % NS)
for recording, puid_list in puids.items():
rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS)
rec_xml.set("id", recording)
p_list_xml = ET.SubElement(rec_xml, "{%s}puid-list" % NS)
l = puid_list if isinstance(puid_list, list) else [puid_list]
for p in l:
p_xml = ET.SubElement(p_list_xml, "{%s}puid" % NS)
p_xml.set("id", p)
return ET.tostring(root, "utf-8")
def make_echoprint_request(echoprints):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rec_list = ET.SubElement(root, "{%s}recording-list" % NS)
for recording, echoprint_list in echoprints.items():
rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS)
rec_xml.set("id", recording)
e_list_xml = ET.SubElement(rec_xml, "{%s}echoprint-list" % NS)
l = echoprint_list if isinstance(echoprint_list, list) else [echoprint_list]
for e in l:
e_xml = ET.SubElement(e_list_xml, "{%s}echoprint" % NS)
e_xml.set("id", e)
return ET.tostring(root, "utf-8")
def make_tag_request(artist_tags, recording_tags):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rec_list = ET.SubElement(root, "{%s}recording-list" % NS)
for rec, tags in recording_tags.items():
rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS)
rec_xml.set("{%s}id" % NS, rec)
taglist = ET.SubElement(rec_xml, "{%s}user-tag-list" % NS)
for t in tags:
usertag_xml = ET.SubElement(taglist, "{%s}user-tag" % NS)
name_xml = ET.SubElement(usertag_xml, "{%s}name" % NS)
name_xml.text = t
art_list = ET.SubElement(root, "{%s}artist-list" % NS)
for art, tags in artist_tags.items():
art_xml = ET.SubElement(art_list, "{%s}artist" % NS)
art_xml.set("{%s}id" % NS, art)
taglist = ET.SubElement(art_xml, "{%s}user-tag-list" % NS)
for t in tags:
usertag_xml = ET.SubElement(taglist, "{%s}user-tag" % NS)
name_xml = ET.SubElement(usertag_xml, "{%s}name" % NS)
name_xml.text = t
return ET.tostring(root, "utf-8")
def make_rating_request(artist_ratings, recording_ratings):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rec_list = ET.SubElement(root, "{%s}recording-list" % NS)
for rec, rating in recording_ratings.items():
rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS)
rec_xml.set("{%s}id" % NS, rec)
rating_xml = ET.SubElement(rec_xml, "{%s}user-rating" % NS)
if isinstance(rating, int):
rating = "%d" % rating
rating_xml.text = rating
art_list = ET.SubElement(root, "{%s}artist-list" % NS)
for art, rating in artist_ratings.items():
art_xml = ET.SubElement(art_list, "{%s}artist" % NS)
art_xml.set("{%s}id" % NS, art)
rating_xml = ET.SubElement(rec_xml, "{%s}user-rating" % NS)
if isinstance(rating, int):
rating = "%d" % rating
rating_xml.text = rating
return ET.tostring(root, "utf-8")
+225 -65
View File
@@ -19,13 +19,14 @@ from __future__ import with_statement # Python 2.5
import os
import logging
import pickle
from collections import defaultdict
from lib.beets import autotag
from lib.beets import library
import lib.beets.autotag.art as beets.autotag.art
import lib.beets.autotag.art
from lib.beets import plugins
from lib.beets.util import pipeline
from lib.beets.util import syspath, normpath
from lib.beets.util import syspath, normpath, plurality
from lib.beets.util.enumeration import enum
action = enum(
@@ -35,6 +36,8 @@ action = enum(
QUEUE_SIZE = 128
STATE_FILE = os.path.expanduser('~/.beetsstate')
SINGLE_ARTIST_THRESH = 0.25
VARIOUS_ARTISTS = u'Various Artists'
# Global logger.
log = logging.getLogger('beets')
@@ -78,12 +81,21 @@ def _reopen_lib(lib):
else:
return lib
def _duplicate_check(lib, artist, album, recent=None):
def _duplicate_check(lib, task, recent=None):
"""Check whether an album already exists in the library. `recent`
should be a set of (artist, album) pairs that will be built up
with every call to this function and checked along with the
library.
"""
if task.choice_flag is action.ASIS:
artist = task.cur_artist
album = task.cur_album
elif task.choice_flag is action.APPLY:
artist = task.info.artist
album = task.info.album
else:
return False
if artist is None:
# As-is import with no artist. Skip check.
return False
@@ -95,14 +107,29 @@ def _duplicate_check(lib, artist, album, recent=None):
recent.add((artist, album))
# Look in the library.
cur_paths = set(i.path for i in task.items)
for album_cand in lib.albums(artist=artist):
if album_cand.album == album:
# Check whether the album is identical in contents, in which
# case it is not a duplicate (will be replaced).
other_paths = set(i.path for i in album_cand.items())
if other_paths == cur_paths:
continue
return True
return False
def _item_duplicate_check(lib, artist, title, recent=None):
def _item_duplicate_check(lib, task, recent=None):
"""Check whether an item already exists in the library."""
if task.choice_flag is action.ASIS:
artist = task.item.artist
title = task.item.title
elif task.choice_flag is action.APPLY:
artist = task.info.artist
title = task.info.title
else:
return False
# Try recent items.
if recent is not None:
if (artist, title) in recent:
@@ -112,13 +139,66 @@ def _item_duplicate_check(lib, artist, title, recent=None):
# Check the library.
item_iter = lib.items(artist=artist, title=title)
try:
item_iter.next()
except StopIteration:
return False
for other_item in item_iter:
# Existing items not considered duplicates.
if other_item.path == task.item.path:
continue
return True
finally:
item_iter.close()
return False
def _infer_album_fields(task):
"""Given an album and an associated import task, massage the
album-level metadata. This ensures that the album artist is set
and that the "compilation" flag is set automatically.
"""
assert task.is_album
assert task.items
changes = {}
if task.choice_flag == action.ASIS:
# Taking metadata "as-is". Guess whether this album is VA.
plur_artist, freq = plurality([i.artist for i in task.items])
if freq == len(task.items) or (freq > 1 and
float(freq) / len(task.items) >= SINGLE_ARTIST_THRESH):
# Single-artist album.
changes['albumartist'] = plur_artist
changes['comp'] = False
else:
# VA.
changes['albumartist'] = VARIOUS_ARTISTS
changes['comp'] = True
elif task.choice_flag == action.APPLY:
# Applying autotagged metadata. Just get AA from the first
# item.
if not task.items[0].albumartist:
changes['albumartist'] = task.items[0].artist
if not task.items[0].mb_albumartistid:
changes['mb_albumartistid'] = task.items[0].mb_artistid
else:
assert False
# Apply new metadata.
for item in task.items:
for k, v in changes.iteritems():
setattr(item, k, v)
def _open_state():
"""Reads the state file, returning a dictionary."""
try:
with open(STATE_FILE) as f:
return pickle.load(f)
except IOError:
return {}
def _save_state(state):
"""Writes the state dictionary out to disk."""
with open(STATE_FILE, 'w') as f:
pickle.dump(state, f)
return True
# Utilities for reading and writing the beets progress file, which
# allows long tagging tasks to be resumed when they pause (or crash).
@@ -128,11 +208,9 @@ def progress_set(toppath, path):
`path`. If path is None, then clear the progress value (indicating
that the tagging completed).
"""
try:
with open(STATE_FILE) as f:
state = pickle.load(f)
except IOError:
state = {PROGRESS_KEY: {}}
state = _open_state()
if PROGRESS_KEY not in state:
state[PROGRESS_KEY] = {}
if path is None:
# Remove progress from file.
@@ -141,20 +219,41 @@ def progress_set(toppath, path):
else:
state[PROGRESS_KEY][toppath] = path
with open(STATE_FILE, 'w') as f:
pickle.dump(state, f)
_save_state(state)
def progress_get(toppath):
"""Get the last successfully tagged subpath of toppath. If toppath
has no progress information, returns None.
"""
try:
with open(STATE_FILE) as f:
state = pickle.load(f)
except IOError:
state = _open_state()
if PROGRESS_KEY not in state:
return None
return state[PROGRESS_KEY].get(toppath)
# Similarly, utilities for manipulating the "incremental" import log.
# This keeps track of all directories that were ever imported, which
# allows the importer to only import new stuff.
HISTORY_KEY = 'taghistory'
def history_add(path):
"""Indicate that the import of `path` is completed and should not
be repeated in incremental imports.
"""
state = _open_state()
if HISTORY_KEY not in state:
state[HISTORY_KEY] = set()
state[HISTORY_KEY].add(path)
_save_state(state)
def history_get():
"""Get the set of completed paths in incremental imports.
"""
state = _open_state()
if HISTORY_KEY not in state:
return set()
return state[HISTORY_KEY]
# The configuration structure.
class ImportConfig(object):
@@ -165,7 +264,8 @@ class ImportConfig(object):
_fields = ['lib', 'paths', 'resume', 'logfile', 'color', 'quiet',
'quiet_fallback', 'copy', 'write', 'art', 'delete',
'choose_match_func', 'should_resume_func', 'threaded',
'autot', 'singletons', 'timid', 'choose_item_func']
'autot', 'singletons', 'timid', 'choose_item_func',
'query', 'incremental']
def __init__(self, **kwargs):
for slot in self._fields:
setattr(self, slot, kwargs[slot])
@@ -174,6 +274,17 @@ class ImportConfig(object):
if self.paths:
self.paths = map(normpath, self.paths)
# Incremental and progress are mutually exclusive.
if self.incremental:
self.resume = False
# When based on a query instead of directories, never
# save progress or try to resume.
if self.query is not None:
self.paths = None
self.resume = False
self.incremental = False
# The importer task class.
@@ -277,6 +388,12 @@ class ImportTask(object):
# album task, which implies the same.
progress_set(self.toppath, self.path)
def save_history(self):
"""Save the directory in the history for incremental imports.
"""
if self.sentinel or self.is_album:
history_add(self.path)
# Logical decisions.
def should_write_tags(self):
"""Should new info be written to the files' metadata?"""
@@ -289,19 +406,6 @@ class ImportTask(object):
def should_fetch_art(self):
"""Should album art be downloaded for this album?"""
return self.should_write_tags() and self.is_album
def should_infer_aa(self):
"""When creating an album structure, should the album artist
field be inferred from the plurality of track artists?
"""
assert self.is_album
if self.choice_flag == action.APPLY:
# Album artist comes from the info dictionary.
return False
elif self.choice_flag == action.ASIS:
# As-is imports likely don't have an album artist.
return True
else:
assert False
def should_skip(self):
"""After a choice has been made, returns True if this is a
sentinel or it has been marked for skipping.
@@ -336,6 +440,10 @@ def read_tasks(config):
else:
# Clear progress; we're starting from the top.
progress_set(path, None)
# Look for saved incremental directories.
if config.incremental:
history_dirs = history_get()
for toppath in config.paths:
# Check whether the path is to a file.
@@ -348,6 +456,7 @@ def read_tasks(config):
if progress:
resume_dir = resume_dirs.get(toppath)
for path, items in autotag.albums_in_dir(toppath):
# Skip according to progress.
if progress and resume_dir:
# We're fast-forwarding to resume a previous tagging.
if path == resume_dir:
@@ -356,6 +465,10 @@ def read_tasks(config):
resume_dir = None
continue
# When incremental, skip paths in the history.
if config.incremental and path in history_dirs:
continue
# Yield all the necessary tasks.
if config.singletons:
for item in items:
@@ -367,6 +480,28 @@ def read_tasks(config):
# Indicate the directory is finished.
yield ImportTask.done_sentinel(toppath)
def query_tasks(config):
"""A generator that works as a drop-in-replacement for read_tasks.
Instead of finding files from the filesystem, a query is used to
match items from the library.
"""
lib = _reopen_lib(config.lib)
if config.singletons:
# Search for items.
items = list(lib.items(config.query))
for item in items:
yield ImportTask.item_task(item)
else:
# Search for albums.
albums = lib.albums(config.query)
for album in albums:
log.debug('yielding album %i: %s - %s' %
(album.id, album.albumartist, album.album))
items = list(album.items())
yield ImportTask(None, album.item_dir(), items)
def initial_lookup(config):
"""A coroutine for performing the initial MusicBrainz lookup for an
album. It accepts lists of Items and yields
@@ -420,19 +555,13 @@ def user_query(config):
item_query(config), collector()))
ipl.run_sequential()
task = pipeline.multiple(item_tasks)
continue
# Check for duplicates if we have a match (or ASIS).
if choice is action.ASIS or isinstance(choice, tuple):
if choice is action.ASIS:
artist = task.cur_artist
album = task.cur_album
else:
artist = task.info['artist']
album = task.info['album']
if _duplicate_check(lib, artist, album, recent):
tag_log(config.logfile, 'duplicate', task.path)
log.warn("This album is already in the library!")
task.set_choice(action.SKIP)
if _duplicate_check(lib, task, recent):
tag_log(config.logfile, 'duplicate', task.path)
log.warn("This album is already in the library!")
task.set_choice(action.SKIP)
def show_progress(config):
"""This stage replaces the initial_lookup and user_query stages
@@ -462,29 +591,60 @@ def apply_choices(config):
if task.should_skip():
continue
# Change metadata, move, and copy.
items = task.items if task.is_album else [task.item]
# Clear IDs in case the items are being re-tagged.
for item in items:
item.id = None
item.album_id = None
# Change metadata.
if task.should_write_tags():
if task.is_album:
autotag.apply_metadata(task.items, task.info)
else:
autotag.apply_item_metadata(task.item, task.info)
items = task.items if task.is_album else [task.item]
if config.copy and config.delete:
task.old_paths = [os.path.realpath(syspath(item.path))
for item in items]
# Infer album-level fields.
if task.is_album:
_infer_album_fields(task)
# Find existing item entries that these are replacing. Old
# album structures are automatically cleaned up when the
# last item is removed.
replaced_items = defaultdict(list)
for item in items:
dup_items = list(lib.items(
library.MatchQuery('path', item.path)
))
for dup_item in dup_items:
replaced_items[item].append(dup_item)
log.debug('replacing item %i: %s' % (dup_item.id, item.path))
log.debug('%i of %i items replaced' % (len(replaced_items),
len(items)))
# Move/copy files.
task.old_paths = [item.path for item in items]
for item in items:
if config.copy:
item.move(lib, True, task.is_album)
# If we're replacing an item, then move rather than
# copying.
do_copy = not bool(replaced_items[item])
lib.move(item, do_copy, task.is_album)
if config.write and task.should_write_tags():
item.write()
# Add items to library. We consolidate this at the end to avoid
# locking while we do the copying and tag updates.
try:
# Remove old items.
for replaced in replaced_items.itervalues():
for item in replaced:
lib.remove(item)
# Add new ones.
if task.is_album:
# Add an album.
album = lib.add_album(task.items,
infer_aa = task.should_infer_aa())
album = lib.add_album(task.items)
task.album_id = album.id
else:
# Add tracks.
@@ -505,7 +665,7 @@ def fetch_art(config):
continue
if task.should_fetch_art():
artpath = beets.autotag.art.art_for_album(task.info)
artpath = lib.beets.autotag.art.art_for_album(task.info, task.path)
# Save the art if any was found.
if artpath:
@@ -526,6 +686,8 @@ def finalize(config):
if task.should_skip():
if config.resume is not False:
task.save_progress()
if config.incremental:
task.save_history()
continue
items = task.items if task.is_album else [task.item]
@@ -549,6 +711,8 @@ def finalize(config):
# Update progress.
if config.resume is not False:
task.save_progress()
if config.incremental:
task.save_history()
# Singleton pipeline stages.
@@ -582,17 +746,10 @@ def item_query(config):
log_choice(config, task)
# Duplicate check.
if task.choice_flag in (action.ASIS, action.APPLY):
if choice is action.ASIS:
artist = task.item.artist
title = task.item.title
else:
artist = task.info['artist']
title = task.info['title']
if _item_duplicate_check(lib, artist, title, recent):
tag_log(config.logfile, 'duplicate', task.item.path)
log.warn("This item is already in the library!")
task.set_choice(action.SKIP)
if _item_duplicate_check(lib, task, recent):
tag_log(config.logfile, 'duplicate', task.item.path)
log.warn("This item is already in the library!")
task.set_choice(action.SKIP)
def item_progress(config):
"""Skips the lookup and query stages in a non-autotagged singleton
@@ -619,7 +776,10 @@ def run_import(**kwargs):
config = ImportConfig(**kwargs)
# Set up the pipeline.
stages = [read_tasks(config)]
if config.query is None:
stages = [read_tasks(config)]
else:
stages = [query_tasks(config)]
if config.singletons:
# Singleton importer.
if config.autot:
+811
View File
@@ -0,0 +1,811 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Provides the basic, interface-agnostic workflow for importing and
autotagging music files.
"""
from __future__ import with_statement # Python 2.5
import os
import logging
import pickle
from collections import defaultdict
from beets import autotag
from beets import library
import beets.autotag.art
from beets import plugins
from beets.util import pipeline
from beets.util import syspath, normpath, plurality
from beets.util.enumeration import enum
action = enum(
'SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID',
name='action'
)
QUEUE_SIZE = 128
STATE_FILE = os.path.expanduser('~/.beetsstate')
SINGLE_ARTIST_THRESH = 0.25
VARIOUS_ARTISTS = u'Various Artists'
# Global logger.
log = logging.getLogger('beets')
class ImportAbort(Exception):
"""Raised when the user aborts the tagging operation.
"""
pass
# Utilities.
def tag_log(logfile, status, path):
"""Log a message about a given album to logfile. The status should
reflect the reason the album couldn't be tagged.
"""
if logfile:
print >>logfile, '%s %s' % (status, path)
def log_choice(config, task):
"""Logs the task's current choice if it should be logged.
"""
path = task.path if task.is_album else task.item.path
if task.choice_flag is action.ASIS:
tag_log(config.logfile, 'asis', path)
elif task.choice_flag is action.SKIP:
tag_log(config.logfile, 'skip', path)
def _reopen_lib(lib):
"""Because of limitations in SQLite, a given Library is bound to
the thread in which it was created. This function reopens Library
objects so that they can be used from separate threads.
"""
if isinstance(lib, library.Library):
return library.Library(
lib.path,
lib.directory,
lib.path_formats,
lib.art_filename,
)
else:
return lib
def _duplicate_check(lib, task, recent=None):
"""Check whether an album already exists in the library. `recent`
should be a set of (artist, album) pairs that will be built up
with every call to this function and checked along with the
library.
"""
if task.choice_flag is action.ASIS:
artist = task.cur_artist
album = task.cur_album
elif task.choice_flag is action.APPLY:
artist = task.info.artist
album = task.info.album
else:
return False
if artist is None:
# As-is import with no artist. Skip check.
return False
# Try the recent albums.
if recent is not None:
if (artist, album) in recent:
return True
recent.add((artist, album))
# Look in the library.
cur_paths = set(i.path for i in task.items)
for album_cand in lib.albums(artist=artist):
if album_cand.album == album:
# Check whether the album is identical in contents, in which
# case it is not a duplicate (will be replaced).
other_paths = set(i.path for i in album_cand.items())
if other_paths == cur_paths:
continue
return True
return False
def _item_duplicate_check(lib, task, recent=None):
"""Check whether an item already exists in the library."""
if task.choice_flag is action.ASIS:
artist = task.item.artist
title = task.item.title
elif task.choice_flag is action.APPLY:
artist = task.info.artist
title = task.info.title
else:
return False
# Try recent items.
if recent is not None:
if (artist, title) in recent:
return True
recent.add((artist, title))
# Check the library.
item_iter = lib.items(artist=artist, title=title)
try:
for other_item in item_iter:
# Existing items not considered duplicates.
if other_item.path == task.item.path:
continue
return True
finally:
item_iter.close()
return False
def _infer_album_fields(task):
"""Given an album and an associated import task, massage the
album-level metadata. This ensures that the album artist is set
and that the "compilation" flag is set automatically.
"""
assert task.is_album
assert task.items
changes = {}
if task.choice_flag == action.ASIS:
# Taking metadata "as-is". Guess whether this album is VA.
plur_artist, freq = plurality([i.artist for i in task.items])
if freq == len(task.items) or (freq > 1 and
float(freq) / len(task.items) >= SINGLE_ARTIST_THRESH):
# Single-artist album.
changes['albumartist'] = plur_artist
changes['comp'] = False
else:
# VA.
changes['albumartist'] = VARIOUS_ARTISTS
changes['comp'] = True
elif task.choice_flag == action.APPLY:
# Applying autotagged metadata. Just get AA from the first
# item.
if not task.items[0].albumartist:
changes['albumartist'] = task.items[0].artist
if not task.items[0].mb_albumartistid:
changes['mb_albumartistid'] = task.items[0].mb_artistid
else:
assert False
# Apply new metadata.
for item in task.items:
for k, v in changes.iteritems():
setattr(item, k, v)
def _open_state():
"""Reads the state file, returning a dictionary."""
try:
with open(STATE_FILE) as f:
return pickle.load(f)
except IOError:
return {}
def _save_state(state):
"""Writes the state dictionary out to disk."""
with open(STATE_FILE, 'w') as f:
pickle.dump(state, f)
# Utilities for reading and writing the beets progress file, which
# allows long tagging tasks to be resumed when they pause (or crash).
PROGRESS_KEY = 'tagprogress'
def progress_set(toppath, path):
"""Record that tagging for the given `toppath` was successful up to
`path`. If path is None, then clear the progress value (indicating
that the tagging completed).
"""
state = _open_state()
if PROGRESS_KEY not in state:
state[PROGRESS_KEY] = {}
if path is None:
# Remove progress from file.
if toppath in state[PROGRESS_KEY]:
del state[PROGRESS_KEY][toppath]
else:
state[PROGRESS_KEY][toppath] = path
_save_state(state)
def progress_get(toppath):
"""Get the last successfully tagged subpath of toppath. If toppath
has no progress information, returns None.
"""
state = _open_state()
if PROGRESS_KEY not in state:
return None
return state[PROGRESS_KEY].get(toppath)
# Similarly, utilities for manipulating the "incremental" import log.
# This keeps track of all directories that were ever imported, which
# allows the importer to only import new stuff.
HISTORY_KEY = 'taghistory'
def history_add(path):
"""Indicate that the import of `path` is completed and should not
be repeated in incremental imports.
"""
state = _open_state()
if HISTORY_KEY not in state:
state[HISTORY_KEY] = set()
state[HISTORY_KEY].add(path)
_save_state(state)
def history_get():
"""Get the set of completed paths in incremental imports.
"""
state = _open_state()
if HISTORY_KEY not in state:
return set()
return state[HISTORY_KEY]
# The configuration structure.
class ImportConfig(object):
"""Contains all the settings used during an import session. Should
be used in a "write-once" way -- everything is set up initially and
then never touched again.
"""
_fields = ['lib', 'paths', 'resume', 'logfile', 'color', 'quiet',
'quiet_fallback', 'copy', 'write', 'art', 'delete',
'choose_match_func', 'should_resume_func', 'threaded',
'autot', 'singletons', 'timid', 'choose_item_func',
'query', 'incremental']
def __init__(self, **kwargs):
for slot in self._fields:
setattr(self, slot, kwargs[slot])
# Normalize the paths.
if self.paths:
self.paths = map(normpath, self.paths)
# Incremental and progress are mutually exclusive.
if self.incremental:
self.resume = False
# When based on a query instead of directories, never
# save progress or try to resume.
if self.query is not None:
self.paths = None
self.resume = False
self.incremental = False
# The importer task class.
class ImportTask(object):
"""Represents a single set of items to be imported along with its
intermediate state. May represent an album or a single item.
"""
def __init__(self, toppath=None, path=None, items=None):
self.toppath = toppath
self.path = path
self.items = items
self.sentinel = False
@classmethod
def done_sentinel(cls, toppath):
"""Create an ImportTask that indicates the end of a top-level
directory import.
"""
obj = cls(toppath)
obj.sentinel = True
return obj
@classmethod
def progress_sentinel(cls, toppath, path):
"""Create a task indicating that a single directory in a larger
import has finished. This is only required for singleton
imports; progress is implied for album imports.
"""
obj = cls(toppath, path)
obj.sentinel = True
return obj
@classmethod
def item_task(cls, item):
"""Creates an ImportTask for a single item."""
obj = cls()
obj.item = item
obj.is_album = False
return obj
def set_match(self, cur_artist, cur_album, candidates, rec):
"""Sets the candidates for this album matched by the
`autotag.tag_album` method.
"""
assert not self.sentinel
self.cur_artist = cur_artist
self.cur_album = cur_album
self.candidates = candidates
self.rec = rec
self.is_album = True
def set_null_match(self):
"""Set the candidates to indicate no album match was found.
"""
self.set_match(None, None, None, None)
def set_item_match(self, candidates, rec):
"""Set the match for a single-item task."""
assert not self.is_album
assert self.item is not None
self.item_match = (candidates, rec)
def set_null_item_match(self):
"""For single-item tasks, mark the item as having no matches.
"""
assert not self.is_album
assert self.item is not None
self.item_match = None
def set_choice(self, choice):
"""Given either an (info, items) tuple or an action constant,
indicates that an action has been selected by the user (or
automatically).
"""
assert not self.sentinel
# Not part of the task structure:
assert choice not in (action.MANUAL, action.MANUAL_ID)
assert choice != action.APPLY # Only used internally.
if choice in (action.SKIP, action.ASIS, action.TRACKS):
self.choice_flag = choice
self.info = None
else:
assert not isinstance(choice, action)
if self.is_album:
info, items = choice
self.items = items # Reordered items list.
else:
info = choice
self.info = info
self.choice_flag = action.APPLY # Implicit choice.
def save_progress(self):
"""Updates the progress state to indicate that this album has
finished.
"""
if self.sentinel and self.path is None:
# "Done" sentinel.
progress_set(self.toppath, None)
elif self.sentinel or self.is_album:
# "Directory progress" sentinel for singletons or a real
# album task, which implies the same.
progress_set(self.toppath, self.path)
def save_history(self):
"""Save the directory in the history for incremental imports.
"""
if self.sentinel or self.is_album:
history_add(self.path)
# Logical decisions.
def should_write_tags(self):
"""Should new info be written to the files' metadata?"""
if self.choice_flag == action.APPLY:
return True
elif self.choice_flag in (action.ASIS, action.TRACKS, action.SKIP):
return False
else:
assert False
def should_fetch_art(self):
"""Should album art be downloaded for this album?"""
return self.should_write_tags() and self.is_album
def should_skip(self):
"""After a choice has been made, returns True if this is a
sentinel or it has been marked for skipping.
"""
return self.sentinel or self.choice_flag == action.SKIP
# Full-album pipeline stages.
def read_tasks(config):
"""A generator yielding all the albums (as ImportTask objects) found
in the user-specified list of paths. In the case of a singleton
import, yields single-item tasks instead.
"""
# Look for saved progress.
progress = config.resume is not False
if progress:
resume_dirs = {}
for path in config.paths:
resume_dir = progress_get(path)
if resume_dir:
# Either accept immediately or prompt for input to decide.
if config.resume:
do_resume = True
log.warn('Resuming interrupted import of %s' % path)
else:
do_resume = config.should_resume_func(config, path)
if do_resume:
resume_dirs[path] = resume_dir
else:
# Clear progress; we're starting from the top.
progress_set(path, None)
# Look for saved incremental directories.
if config.incremental:
history_dirs = history_get()
for toppath in config.paths:
# Check whether the path is to a file.
if config.singletons and not os.path.isdir(syspath(toppath)):
item = library.Item.from_path(toppath)
yield ImportTask.item_task(item)
continue
# Produce paths under this directory.
if progress:
resume_dir = resume_dirs.get(toppath)
for path, items in autotag.albums_in_dir(toppath):
# Skip according to progress.
if progress and resume_dir:
# We're fast-forwarding to resume a previous tagging.
if path == resume_dir:
# We've hit the last good path! Turn off the
# fast-forwarding.
resume_dir = None
continue
# When incremental, skip paths in the history.
if config.incremental and path in history_dirs:
continue
# Yield all the necessary tasks.
if config.singletons:
for item in items:
yield ImportTask.item_task(item)
yield ImportTask.progress_sentinel(toppath, path)
else:
yield ImportTask(toppath, path, items)
# Indicate the directory is finished.
yield ImportTask.done_sentinel(toppath)
def query_tasks(config):
"""A generator that works as a drop-in-replacement for read_tasks.
Instead of finding files from the filesystem, a query is used to
match items from the library.
"""
lib = _reopen_lib(config.lib)
if config.singletons:
# Search for items.
items = list(lib.items(config.query))
for item in items:
yield ImportTask.item_task(item)
else:
# Search for albums.
albums = lib.albums(config.query)
for album in albums:
log.debug('yielding album %i: %s - %s' %
(album.id, album.albumartist, album.album))
items = list(album.items())
yield ImportTask(None, album.item_dir(), items)
def initial_lookup(config):
"""A coroutine for performing the initial MusicBrainz lookup for an
album. It accepts lists of Items and yields
(items, cur_artist, cur_album, candidates, rec) tuples. If no match
is found, all of the yielded parameters (except items) are None.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
log.debug('Looking up: %s' % task.path)
try:
task.set_match(*autotag.tag_album(task.items, config.timid))
except autotag.AutotagError:
task.set_null_match()
def user_query(config):
"""A coroutine for interfacing with the user about the tagging
process. lib is the Library to import into and logfile may be
a file-like object for logging the import process. The coroutine
accepts and yields ImportTask objects.
"""
lib = _reopen_lib(config.lib)
recent = set()
task = None
while True:
task = yield task
if task.sentinel:
continue
# Ask the user for a choice.
choice = config.choose_match_func(task, config)
task.set_choice(choice)
log_choice(config, task)
# As-tracks: transition to singleton workflow.
if choice is action.TRACKS:
# Set up a little pipeline for dealing with the singletons.
item_tasks = []
def emitter():
for item in task.items:
yield ImportTask.item_task(item)
yield ImportTask.progress_sentinel(task.toppath, task.path)
def collector():
while True:
item_task = yield
item_tasks.append(item_task)
ipl = pipeline.Pipeline((emitter(), item_lookup(config),
item_query(config), collector()))
ipl.run_sequential()
task = pipeline.multiple(item_tasks)
continue
# Check for duplicates if we have a match (or ASIS).
if _duplicate_check(lib, task, recent):
tag_log(config.logfile, 'duplicate', task.path)
log.warn("This album is already in the library!")
task.set_choice(action.SKIP)
def show_progress(config):
"""This stage replaces the initial_lookup and user_query stages
when the importer is run without autotagging. It displays the album
name and artist as the files are added.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
log.info(task.path)
# Behave as if ASIS were selected.
task.set_null_match()
task.set_choice(action.ASIS)
def apply_choices(config):
"""A coroutine for applying changes to albums during the autotag
process.
"""
lib = _reopen_lib(config.lib)
task = None
while True:
task = yield task
if task.should_skip():
continue
items = task.items if task.is_album else [task.item]
# Clear IDs in case the items are being re-tagged.
for item in items:
item.id = None
item.album_id = None
# Change metadata.
if task.should_write_tags():
if task.is_album:
autotag.apply_metadata(task.items, task.info)
else:
autotag.apply_item_metadata(task.item, task.info)
# Infer album-level fields.
if task.is_album:
_infer_album_fields(task)
# Find existing item entries that these are replacing. Old
# album structures are automatically cleaned up when the
# last item is removed.
replaced_items = defaultdict(list)
for item in items:
dup_items = list(lib.items(
library.MatchQuery('path', item.path)
))
for dup_item in dup_items:
replaced_items[item].append(dup_item)
log.debug('replacing item %i: %s' % (dup_item.id, item.path))
log.debug('%i of %i items replaced' % (len(replaced_items),
len(items)))
# Move/copy files.
task.old_paths = [item.path for item in items]
for item in items:
if config.copy:
# If we're replacing an item, then move rather than
# copying.
do_copy = not bool(replaced_items[item])
lib.move(item, do_copy, task.is_album)
if config.write and task.should_write_tags():
item.write()
# Add items to library. We consolidate this at the end to avoid
# locking while we do the copying and tag updates.
try:
# Remove old items.
for replaced in replaced_items.itervalues():
for item in replaced:
lib.remove(item)
# Add new ones.
if task.is_album:
# Add an album.
album = lib.add_album(task.items)
task.album_id = album.id
else:
# Add tracks.
for item in items:
lib.add(item)
finally:
lib.save()
def fetch_art(config):
"""A coroutine that fetches and applies album art for albums where
appropriate.
"""
lib = _reopen_lib(config.lib)
task = None
while True:
task = yield task
if task.should_skip():
continue
if task.should_fetch_art():
artpath = beets.autotag.art.art_for_album(task.info, task.path)
# Save the art if any was found.
if artpath:
try:
album = lib.get_album(task.album_id)
album.set_art(artpath)
finally:
lib.save(False)
def finalize(config):
"""A coroutine that finishes up importer tasks. In particular, the
coroutine sends plugin events, deletes old files, and saves
progress. This is a "terminal" coroutine (it yields None).
"""
lib = _reopen_lib(config.lib)
while True:
task = yield
if task.should_skip():
if config.resume is not False:
task.save_progress()
if config.incremental:
task.save_history()
continue
items = task.items if task.is_album else [task.item]
# Announce that we've added an album.
if task.is_album:
album = lib.get_album(task.album_id)
plugins.send('album_imported', lib=lib, album=album)
else:
for item in items:
plugins.send('item_imported', lib=lib, item=item)
# Finally, delete old files.
if config.copy and config.delete:
new_paths = [os.path.realpath(item.path) for item in items]
for old_path in task.old_paths:
# Only delete files that were actually moved.
if old_path not in new_paths:
os.remove(syspath(old_path))
# Update progress.
if config.resume is not False:
task.save_progress()
if config.incremental:
task.save_history()
# Singleton pipeline stages.
def item_lookup(config):
"""A coroutine used to perform the initial MusicBrainz lookup for
an item task.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
task.set_item_match(*autotag.tag_item(task.item, config.timid))
def item_query(config):
"""A coroutine that queries the user for input on single-item
lookups.
"""
lib = _reopen_lib(config.lib)
task = None
recent = set()
while True:
task = yield task
if task.sentinel:
continue
choice = config.choose_item_func(task, config)
task.set_choice(choice)
log_choice(config, task)
# Duplicate check.
if _item_duplicate_check(lib, task, recent):
tag_log(config.logfile, 'duplicate', task.item.path)
log.warn("This item is already in the library!")
task.set_choice(action.SKIP)
def item_progress(config):
"""Skips the lookup and query stages in a non-autotagged singleton
import. Just shows progress.
"""
task = None
log.info('Importing items:')
while True:
task = yield task
if task.sentinel:
continue
log.info(task.item.path)
task.set_null_item_match()
task.set_choice(action.ASIS)
# Main driver.
def run_import(**kwargs):
"""Run an import. The keyword arguments are the same as those to
ImportConfig.
"""
config = ImportConfig(**kwargs)
# Set up the pipeline.
if config.query is None:
stages = [read_tasks(config)]
else:
stages = [query_tasks(config)]
if config.singletons:
# Singleton importer.
if config.autot:
stages += [item_lookup(config), item_query(config)]
else:
stages += [item_progress(config)]
else:
# Whole-album importer.
if config.autot:
# Only look up and query the user when autotagging.
stages += [initial_lookup(config), user_query(config)]
else:
# When not autotagging, just display progress.
stages += [show_progress(config)]
stages += [apply_choices(config)]
if config.art:
stages += [fetch_art(config)]
stages += [finalize(config)]
pl = pipeline.Pipeline(stages)
# Run the pipeline.
try:
if config.threaded:
pl.run_parallel(QUEUE_SIZE)
else:
pl.run_sequential()
except ImportAbort:
# User aborted operation. Silently stop.
pass
+172 -115
View File
@@ -15,14 +15,13 @@
import sqlite3
import os
import re
import shutil
import sys
from string import Template
import logging
from lib.beets.mediafile import MediaFile
from lib.beets import plugins
from lib.beets import util
from lib.beets.util import bytestring_path, syspath, normpath
from lib.beets.util import bytestring_path, syspath, normpath, samefile
MAX_FILENAME_LENGTH = 200
@@ -62,6 +61,7 @@ ITEM_FIELDS = [
('mb_artistid', 'text', True, True),
('mb_albumartistid', 'text', True, True),
('albumtype', 'text', True, True),
('label', 'text', True, True),
('length', 'real', False, True),
('bitrate', 'int', False, True),
@@ -90,6 +90,7 @@ ALBUM_FIELDS = [
('mb_albumid', 'text', True),
('mb_albumartistid', 'text', True),
('albumtype', 'text', True),
('label', 'text', True),
]
ALBUM_KEYS = [f[0] for f in ALBUM_FIELDS]
ALBUM_KEYS_ITEM = [f[0] for f in ALBUM_FIELDS if f[2]]
@@ -204,46 +205,20 @@ class Item(object):
"""
f = MediaFile(syspath(self.path))
for key in ITEM_KEYS_WRITABLE:
if getattr(self, key): #make sure it has a value before we set it and create blank tags with wrong types
setattr(f, key, getattr(self, key))
setattr(f, key, getattr(self, key))
f.save()
# Dealing with files themselves.
def move(self, library, copy=False, in_album=False):
"""Move the item to its designated location within the library
directory (provided by destination()). Subdirectories are
created as needed. If the operation succeeds, the item's path
field is updated to reflect the new location.
If copy is True, moving the file is copied rather than moved.
If in_album is True, then the track is treated as part of an
album even if it does not yet have an album_id associated with
it. (This allows items to be moved before they are added to the
database, a performance optimization.)
Passes on appropriate exceptions if directories cannot be created
or moving/copying fails.
Note that one should almost certainly call store() and
library.save() after this method in order to keep on-disk data
consistent.
# Files themselves.
def move(self, dest, copy=False):
"""Moves or copies the item's file, updating the path value if
the move succeeds.
"""
dest = library.destination(self, in_album=in_album)
# Create necessary ancestry for the move.
util.mkdirall(dest)
if not shutil._samefile(syspath(self.path), syspath(dest)):
if copy:
# copyfile rather than copy will not copy permissions
# bits, thus possibly making the copy writable even when
# the original is read-only.
shutil.copyfile(syspath(self.path), syspath(dest))
else:
shutil.move(syspath(self.path), syspath(dest))
if copy:
util.copy(self.path, dest)
else:
util.move(self.path, dest)
# Either copying or moving succeeded, so update the stored path.
self.path = dest
@@ -380,56 +355,61 @@ class CollectionQuery(Query):
clause = (' ' + joiner + ' ').join(clause_parts)
return clause, subvals
# regular expression for _parse_query, below
_pq_regex = re.compile(r'(?:^|(?<=\s))' # zero-width match for whitespace
# or beginning of string
# non-grouping optional segment for the keyword
# regular expression for _parse_query_part, below
_pq_regex = re.compile(# non-grouping optional segment for the keyword
r'(?:'
r'(\S+?)' # the keyword
r'(?<!\\):' # unescaped :
r')?'
r'(\S+)', # the term itself
r'(.+)', # the term itself
re.I) # case-insensitive
@classmethod
def _parse_query(cls, query_string):
"""Takes a query in the form of a whitespace-separated list of
search terms that may be preceded with a key followed by a
colon. Returns a list of pairs (key, term) where key is None if
the search term has no key.
def _parse_query_part(cls, part):
"""Takes a query in the form of a key/value pair separated by a
colon. Returns pair (key, term) where key is None if the search
term has no key.
For instance,
parse_query('stapler color:red') ==
[(None, 'stapler'), ('color', 'red')]
parse_query('stapler') == (None, 'stapler')
parse_query('color:red') == ('color', 'red')
Colons may be 'escaped' with a backslash to disable the keying
behavior.
"""
out = []
for match in cls._pq_regex.finditer(query_string):
out.append((match.group(1), match.group(2).replace(r'\:',':')))
return out
part = part.strip()
match = cls._pq_regex.match(part)
if match:
return match.group(1), match.group(2).replace(r'\:', ':')
@classmethod
def from_string(cls, query_string, default_fields=None, all_keys=ITEM_KEYS):
"""Creates a query from a string in the format used by
_parse_query. If default_fields are specified, they are the
def from_strings(cls, query_parts, default_fields=None, all_keys=ITEM_KEYS):
"""Creates a query from a list of strings in the format used by
_parse_query_part. If default_fields are specified, they are the
fields to be searched by unqualified search terms. Otherwise,
all fields are searched for those terms.
"""
subqueries = []
for key, pattern in cls._parse_query(query_string):
if key is None: # no key specified; match any field
subqueries.append(AnySubstringQuery(pattern, default_fields))
for part in query_parts:
res = cls._parse_query_part(part)
if not res:
continue
key, pattern = res
if key is None: # No key specified.
if os.sep in pattern:
# This looks like a path.
subqueries.append(PathQuery(pattern))
else:
# Match any field.
subqueries.append(AnySubstringQuery(pattern,
default_fields))
elif key.lower() == 'comp': # a boolean field
subqueries.append(BooleanQuery(key.lower(), pattern))
elif key.lower() == 'path':
subqueries.append(PathQuery(pattern))
elif key.lower() in all_keys: # ignore unrecognized keys
subqueries.append(SubstringQuery(key.lower(), pattern))
elif key.lower() == 'singleton':
subqueries.append(SingletonQuery(util.str2bool(pattern)))
elif key.lower() == 'path':
subqueries.append(PathQuery(pattern))
if not subqueries: # no terms in query
subqueries = [TrueQuery()]
return cls(subqueries)
@@ -491,8 +471,10 @@ class TrueQuery(Query):
class PathQuery(Query):
"""A query that matches all items under a given path."""
def __init__(self, path):
self.file_path = normpath(path) # As a file.
self.dir_path = os.path.join(path, '') # As a directory (prefix).
# Match the path as a single file.
self.file_path = normpath(path)
# As a directory (prefix).
self.dir_path = os.path.join(self.file_path, '')
def match(self, item):
return (item.path == self.file_path) or \
@@ -500,7 +482,8 @@ class PathQuery(Query):
def clause(self):
dir_pat = self.dir_path + '%'
return '(path = ?) || (path LIKE ?)', (self.file_path, dir_pat)
file_blob = buffer(bytestring_path(self.file_path))
return '(path = ?) || (path LIKE ?)', (file_blob, dir_pat)
class ResultIterator(object):
"""An iterator into an item query result set."""
@@ -540,9 +523,10 @@ class BaseLibrary(object):
@classmethod
def _get_query(cls, val=None, album=False):
"""Takes a value which may be None, a query string, or a Query
object, and returns a suitable Query object. album determines
whether the query is to match items or albums.
"""Takes a value which may be None, a query string, a query
string list, or a Query object, and returns a suitable Query
object. album determines whether the query is to match items
or albums.
"""
if album:
default_fields = ALBUM_DEFAULT_FIELDS
@@ -551,10 +535,15 @@ class BaseLibrary(object):
default_fields = ITEM_DEFAULT_FIELDS
all_keys = ITEM_KEYS
# Convert a single string into a list of space-separated
# criteria.
if isinstance(val, basestring):
val = val.split()
if val is None:
return TrueQuery()
elif isinstance(val, basestring):
return AndQuery.from_string(val, default_fields, all_keys)
elif isinstance(val, list) or isinstance(val, tuple):
return AndQuery.from_strings(val, default_fields, all_keys)
elif isinstance(val, Query):
return val
elif not isinstance(val, Query):
@@ -711,8 +700,11 @@ class Library(BaseLibrary):
art_filename='cover',
item_fields=ITEM_FIELDS,
album_fields=ALBUM_FIELDS):
self.path = bytestring_path(path)
self.directory = bytestring_path(directory)
if path == ':memory:':
self.path = path
else:
self.path = bytestring_path(normpath(path))
self.directory = bytestring_path(normpath(directory))
if path_formats is None:
path_formats = {'default': '$artist/$album/$track $title'}
elif isinstance(path_formats, basestring):
@@ -770,13 +762,15 @@ class Library(BaseLibrary):
self.conn.executescript(setup_sql)
self.conn.commit()
def destination(self, item, pathmod=None, in_album=False, fragment=False):
def destination(self, item, pathmod=None, in_album=False,
fragment=False, basedir=None):
"""Returns the path in the library directory designated for item
item (i.e., where the file ought to be). in_album forces the
item to be treated as part of an album. fragment makes this
method return just the path fragment underneath the root library
directory; the path is also returned as Unicode instead of
encoded as a bytestring.
encoded as a bytestring. basedir can override the library's base
directory for the destination.
"""
pathmod = pathmod or os.path
@@ -836,16 +830,17 @@ class Library(BaseLibrary):
if fragment:
return subpath
else:
return normpath(os.path.join(self.directory, subpath))
basedir = basedir or self.directory
return normpath(os.path.join(basedir, subpath))
# Main interface.
# Item manipulation.
def add(self, item, copy=False):
#FIXME make a deep copy of the item?
item.library = self
if copy:
item.move(self, copy=True)
self.move(item, copy=True)
# build essential parts of query
columns = ','.join([key for key in ITEM_KEYS if key != 'id'])
@@ -937,6 +932,53 @@ class Library(BaseLibrary):
if delete:
util.soft_remove(item.path)
util.prune_dirs(os.path.dirname(item.path), self.directory)
def move(self, item, copy=False, in_album=False, basedir=None,
with_album=True):
"""Move the item to its designated location within the library
directory (provided by destination()). Subdirectories are
created as needed. If the operation succeeds, the item's path
field is updated to reflect the new location.
If copy is True, moving the file is copied rather than moved.
If in_album is True, then the track is treated as part of an
album even if it does not yet have an album_id associated with
it. (This allows items to be moved before they are added to the
database, a performance optimization.)
basedir overrides the library base directory for the
destination.
If the item is in an album, the album is given an opportunity to
move its art. (This can be disabled by passing
with_album=False.)
The item is stored to the database if it is in the database, so
any dirty fields prior to the move() call will be written as a
side effect. You probably want to call save() to commit the DB
transaction.
"""
dest = self.destination(item, in_album=in_album, basedir=basedir)
# Create necessary ancestry for the move.
util.mkdirall(dest)
# Perform the move and store the change.
old_path = item.path
item.move(dest, copy)
if item.id is not None:
self.store(item)
# If this item is in an album, move its art.
if with_album:
album = self.get_album(item)
if album:
album.move_art(copy)
# Prune vacated directory.
if not copy:
util.prune_dirs(os.path.dirname(old_path), self.directory)
# Querying.
@@ -1009,25 +1051,15 @@ class Library(BaseLibrary):
if record:
return Album(self, dict(record))
def add_album(self, items, infer_aa=False):
def add_album(self, items):
"""Create a new album in the database with metadata derived
from its items. The items are added to the database if they
don't yet have an ID. Returns an Album object. If the
infer_aa flag is set, then the album artist field will be
guessed from artist fields when not present.
don't yet have an ID. Returns an Album object.
"""
# Set the metadata from the first item.
#fixme: check for consensus?
item_values = dict(
(key, getattr(items[0], key)) for key in ALBUM_KEYS_ITEM)
if infer_aa:
namemap = {
'albumartist': 'artist',
'mb_albumartistid': 'mb_artistid',
}
for field, itemfield in namemap.iteritems():
if not item_values[field]:
item_values[field] = getattr(items[0], itemfield)
sql = 'INSERT INTO albums (%s) VALUES (%s)' % \
(', '.join(ALBUM_KEYS_ITEM),
@@ -1141,37 +1173,53 @@ class Album(BaseAlbum):
(self.id,)
)
def move(self, copy=False):
"""Moves (or copies) all items to their destination. Any
album art moves along with them.
def move_art(self, copy=False):
"""Move or copy any existing album art so that it remains in the
same directory as the items.
"""
old_art = self.artpath
if not old_art:
return
new_art = self.art_destination(old_art)
if new_art == old_art:
return
log.debug('moving album art %s to %s' % (old_art, new_art))
if copy:
util.copy(old_art, new_art)
else:
util.move(old_art, new_art)
self.artpath = new_art
# Prune old path when moving.
if not copy:
util.prune_dirs(os.path.dirname(old_art),
self._library.directory)
def move(self, copy=False, basedir=None):
"""Moves (or copies) all items to their destination. Any album
art moves along with them. basedir overrides the library base
directory for the destination.
"""
basedir = basedir or self._library.directory
# Move items.
items = list(self.items())
for item in items:
item.move(self._library, copy)
newdir = os.path.dirname(items[0].path)
self._library.move(item, copy, basedir=basedir, with_album=False)
# Move art.
old_art = self.artpath
if old_art:
new_art = self.art_destination(old_art, newdir)
if new_art != old_art:
if copy:
shutil.copy(syspath(old_art), syspath(new_art))
else:
shutil.move(syspath(old_art), syspath(new_art))
self.artpath = new_art
# Store new item paths. We do this at the end to avoid
# locking the database for too long while files are copied.
for item in items:
self._library.store(item)
self.move_art(copy)
def item_dir(self):
"""Returns the directory containing the album's first item,
provided that such an item exists.
"""
item = self.items().next()
try:
item = self.items().next()
except StopIteration:
raise ValueError('empty album')
return os.path.dirname(item.path)
def art_destination(self, image, item_dir=None):
@@ -1196,8 +1244,17 @@ class Album(BaseAlbum):
path = bytestring_path(path)
oldart = self.artpath
artdest = self.art_destination(path)
if oldart and samefile(path, oldart):
# Art already set.
return
elif samefile(path, artdest):
# Art already in place.
self.artpath = path
return
# Normal operation.
if oldart == artdest:
util.soft_remove(oldart)
shutil.copyfile(syspath(path), syspath(artdest))
util.copy(path, artdest)
self.artpath = artdest
File diff suppressed because it is too large Load Diff
+32 -3
View File
@@ -39,11 +39,17 @@ import re
import base64
import imghdr
import os
import logging
import traceback
from lib.beets.util.enumeration import enum
__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile']
# Logger.
log = logging.getLogger('beets')
# Exceptions.
# Raised for any file MediaFile can't read.
@@ -382,6 +388,11 @@ class MediaField(object):
if style.packing:
out = Packed(out, style.packing)[style.pack_pos]
# MPEG-4 freeform frames are (should be?) encoded as UTF-8.
if obj.type == 'mp4' and style.key.startswith('----:') and \
isinstance(out, str):
out = out.decode('utf8')
return _safe_cast(self.out_type, out)
@@ -410,8 +421,8 @@ class MediaField(object):
out = u''
# We trust that packed values are handled above.
# convert to correct storage type (irrelevant for
# packed values)
# Convert to correct storage type (irrelevant for
# packed values).
if style.as_type == unicode:
if out is None:
out = u''
@@ -429,7 +440,13 @@ class MediaField(object):
elif style.as_type in (bool, str):
out = style.as_type(out)
# store the data
# MPEG-4 "freeform" (----) frames must be encoded as UTF-8
# byte strings.
if obj.type == 'mp4' and style.key.startswith('----:') and \
isinstance(out, unicode):
out = out.encode('utf8')
# Store the data.
self._storedata(obj, out, style)
class CompositeDateField(object):
@@ -619,9 +636,14 @@ class MediaFile(object):
try:
self.mgfile = lib.mutagen.File(path)
except unreadable_exc:
log.warn('header parsing failed')
raise UnreadableFileError('Mutagen could not read file')
except IOError:
raise UnreadableFileError('could not read file')
except:
# Hide bugs in Mutagen.
log.error('uncaught Mutagen exception:\n' + traceback.format_exc())
raise UnreadableFileError('Mutagen raised an exception')
if self.mgfile is None: # Mutagen couldn't guess the type
raise FileTypeError('file type unsupported by Mutagen')
@@ -799,6 +821,13 @@ class MediaFile(object):
'----:com.apple.iTunes:MusicBrainz Album Type'),
etc = StorageStyle('musicbrainz_albumtype')
)
label = MediaField(
mp3 = StorageStyle('TPUB'),
mp4 = [StorageStyle('----:com.apple.iTunes:Label'),
StorageStyle('----:com.apple.iTunes:publisher')],
etc = [StorageStyle('label'),
StorageStyle('publisher')] # Traktor
)
# Album art.
art = ImageField()
+885
View File
@@ -0,0 +1,885 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Handles low-level interfacing for files' tags. Wraps Mutagen to
automatically detect file types and provide a unified interface for a
useful subset of music files' tags.
Usage:
>>> f = MediaFile('Lucy.mp3')
>>> f.title
u'Lucy in the Sky with Diamonds'
>>> f.artist = 'The Beatles'
>>> f.save()
A field will always return a reasonable value of the correct type, even
if no tag is present. If no value is available, the value will be false
(e.g., zero or the empty string).
"""
import mutagen
import mutagen.mp3
import mutagen.oggvorbis
import mutagen.mp4
import mutagen.flac
import mutagen.monkeysaudio
import datetime
import re
import base64
import imghdr
import os
import logging
import traceback
from beets.util.enumeration import enum
__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile']
# Logger.
log = logging.getLogger('beets')
# Exceptions.
# Raised for any file MediaFile can't read.
class UnreadableFileError(IOError):
pass
# Raised for files that don't seem to have a type MediaFile supports.
class FileTypeError(UnreadableFileError):
pass
# Constants.
# Human-readable type names.
TYPES = {
'mp3': 'MP3',
'mp4': 'AAC',
'ogg': 'OGG',
'flac': 'FLAC',
'ape': 'APE',
'wv': 'WavPack',
'mpc': 'Musepack',
}
# Utility.
def _safe_cast(out_type, val):
"""Tries to covert val to out_type but will never raise an
exception. If the value can't be converted, then a sensible
default value is returned. out_type should be bool, int, or
unicode; otherwise, the value is just passed through.
"""
if out_type == int:
if val is None:
return 0
elif isinstance(val, int) or isinstance(val, float):
# Just a number.
return int(val)
else:
# Process any other type as a string.
if not isinstance(val, basestring):
val = unicode(val)
# Get a number from the front of the string.
val = re.match('[0-9]*', val.strip()).group(0)
if not val:
return 0
else:
return int(val)
elif out_type == bool:
if val is None:
return False
else:
try:
# Should work for strings, bools, ints:
return bool(int(val))
except ValueError:
return False
elif out_type == unicode:
if val is None:
return u''
else:
return unicode(val)
else:
return val
# Flags for encoding field behavior.
# Determine style of packing, if any.
packing = enum('SLASHED', # pair delimited by /
'TUPLE', # a python tuple of 2 items
'DATE', # YYYY-MM-DD
name='packing')
class StorageStyle(object):
"""Parameterizes the storage behavior of a single field for a
certain tag format.
- key: The Mutagen key used to access the field's data.
- list_elem: Store item as a single object or as first element
of a list.
- as_type: Which type the value is stored as (unicode, int,
bool, or str).
- packing: If this value is packed in a multiple-value storage
unit, which type of packing (in the packing enum). Otherwise,
None. (Makes as_type irrelevant).
- pack_pos: If the value is packed, in which position it is
stored.
- ID3 storage only: match against this 'desc' field as well
as the key.
"""
def __init__(self, key, list_elem = True, as_type = unicode,
packing = None, pack_pos = 0, id3_desc = None,
id3_frame_field = 'text'):
self.key = key
self.list_elem = list_elem
self.as_type = as_type
self.packing = packing
self.pack_pos = pack_pos
self.id3_desc = id3_desc
self.id3_frame_field = id3_frame_field
# Dealing with packings.
class Packed(object):
"""Makes a packed list of values subscriptable. To access the packed
output after making changes, use packed_thing.items.
"""
def __init__(self, items, packstyle, none_val=0, out_type=int):
"""Create a Packed object for subscripting the packed values in
items. The items are packed using packstyle, which is a value
from the packing enum. none_val is returned from a request when
no suitable value is found in the items. Vales are converted to
out_type before they are returned.
"""
self.items = items
self.packstyle = packstyle
self.none_val = none_val
self.out_type = out_type
def __getitem__(self, index):
if not isinstance(index, int):
raise TypeError('index must be an integer')
if self.items is None:
return self.none_val
items = self.items
if self.packstyle == packing.DATE:
# Remove time information from dates. Usually delimited by
# a "T" or a space.
items = re.sub(r'[Tt ].*$', '', unicode(items))
# transform from a string packing into a list we can index into
if self.packstyle == packing.SLASHED:
seq = unicode(items).split('/')
elif self.packstyle == packing.DATE:
seq = unicode(items).split('-')
elif self.packstyle == packing.TUPLE:
seq = items # tuple: items is already indexable
try:
out = seq[index]
except:
out = None
if out is None or out == self.none_val or out == '':
return _safe_cast(self.out_type, self.none_val)
else:
return _safe_cast(self.out_type, out)
def __setitem__(self, index, value):
if self.packstyle in (packing.SLASHED, packing.TUPLE):
# SLASHED and TUPLE are always two-item packings
length = 2
else:
# DATE can have up to three fields
length = 3
# make a list of the items we'll pack
new_items = []
for i in range(length):
if i == index:
next_item = value
else:
next_item = self[i]
new_items.append(next_item)
if self.packstyle == packing.DATE:
# Truncate the items wherever we reach an invalid (none)
# entry. This prevents dates like 2008-00-05.
for i, item in enumerate(new_items):
if item == self.none_val or item is None:
del(new_items[i:]) # truncate
break
if self.packstyle == packing.SLASHED:
self.items = '/'.join(map(unicode, new_items))
elif self.packstyle == packing.DATE:
field_lengths = [4, 2, 2] # YYYY-MM-DD
elems = []
for i, item in enumerate(new_items):
elems.append( ('%0' + str(field_lengths[i]) + 'i') % item )
self.items = '-'.join(elems)
elif self.packstyle == packing.TUPLE:
self.items = new_items
# The field itself.
class MediaField(object):
"""A descriptor providing access to a particular (abstract) metadata
field. out_type is the type that users of MediaFile should see and
can be unicode, int, or bool. id3, mp4, and flac are StorageStyle
instances parameterizing the field's storage for each type.
"""
def __init__(self, out_type = unicode, **kwargs):
"""Creates a new MediaField.
- out_type: The field's semantic (exterior) type.
- kwargs: A hash whose keys are 'mp3', 'mp4', and 'etc'
and whose values are StorageStyle instances
parameterizing the field's storage for each type.
"""
self.out_type = out_type
if not set(['mp3', 'mp4', 'etc']) == set(kwargs):
raise TypeError('MediaField constructor must have keyword '
'arguments mp3, mp4, and etc')
self.styles = kwargs
def _fetchdata(self, obj, style):
"""Get the value associated with this descriptor's field stored
with the given StorageStyle. Unwraps from a list if necessary.
"""
# fetch the value, which may be a scalar or a list
if obj.type == 'mp3':
if style.id3_desc is not None: # also match on 'desc' field
frames = obj.mgfile.tags.getall(style.key)
entry = None
for frame in frames:
if frame.desc == style.id3_desc:
entry = getattr(frame, style.id3_frame_field)
break
if entry is None: # no desc match
return None
else:
# Get the metadata frame object.
try:
frame = obj.mgfile[style.key]
except KeyError:
return None
entry = getattr(frame, style.id3_frame_field)
else: # Not MP3.
try:
entry = obj.mgfile[style.key]
except KeyError:
return None
# possibly index the list
if style.list_elem:
if entry: # List must have at least one value.
return entry[0]
else:
return None
else:
return entry
def _storedata(self, obj, val, style):
"""Store val for this descriptor's field in the tag dictionary
according to the provided StorageStyle. Store it as a
single-item list if necessary.
"""
# wrap as a list if necessary
if style.list_elem: out = [val]
else: out = val
if obj.type == 'mp3':
# Try to match on "desc" field.
if style.id3_desc is not None:
frames = obj.mgfile.tags.getall(style.key)
# try modifying in place
found = False
for frame in frames:
if frame.desc == style.id3_desc:
setattr(frame, style.id3_frame_field, out)
found = True
break
# need to make a new frame?
if not found:
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = mutagen.id3.Frames[style.key](
encoding=3,
desc=style.id3_desc,
**{style.id3_frame_field: val}
)
obj.mgfile.tags.add(frame)
# Try to match on "owner" field.
elif style.key.startswith('UFID:'):
owner = style.key.split(':', 1)[1]
frames = obj.mgfile.tags.getall(style.key)
for frame in frames:
# Replace existing frame data.
if frame.owner == owner:
setattr(frame, style.id3_frame_field, val)
else:
# New frame.
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = mutagen.id3.UFID(owner=owner,
**{style.id3_frame_field: val})
obj.mgfile.tags.setall('UFID', [frame])
# Just replace based on key.
else:
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = mutagen.id3.Frames[style.key](encoding = 3,
**{style.id3_frame_field: val})
obj.mgfile.tags.setall(style.key, [frame])
else: # Not MP3.
obj.mgfile[style.key] = out
def _styles(self, obj):
if obj.type in ('mp3', 'mp4'):
styles = self.styles[obj.type]
else:
styles = self.styles['etc'] # sane styles
# Make sure we always return a list of styles, even when given
# a single style for convenience.
if isinstance(styles, StorageStyle):
return [styles]
else:
return styles
def __get__(self, obj, owner):
"""Retrieve the value of this metadata field.
"""
# Fetch the data using the various StorageStyles.
styles = self._styles(obj)
for style in styles:
# Use the first style that returns a reasonable value.
out = self._fetchdata(obj, style)
if out:
break
if style.packing:
out = Packed(out, style.packing)[style.pack_pos]
# MPEG-4 freeform frames are (should be?) encoded as UTF-8.
if obj.type == 'mp4' and style.key.startswith('----:') and \
isinstance(out, str):
out = out.decode('utf8')
return _safe_cast(self.out_type, out)
def __set__(self, obj, val):
"""Set the value of this metadata field.
"""
# Store using every StorageStyle available.
styles = self._styles(obj)
for style in styles:
if style.packing:
p = Packed(self._fetchdata(obj, style), style.packing)
p[style.pack_pos] = val
out = p.items
else: # unicode, integer, or boolean scalar
out = val
# deal with Nones according to abstract type if present
if out is None:
if self.out_type == int:
out = 0
elif self.out_type == bool:
out = False
elif self.out_type == unicode:
out = u''
# We trust that packed values are handled above.
# Convert to correct storage type (irrelevant for
# packed values).
if style.as_type == unicode:
if out is None:
out = u''
else:
if self.out_type == bool:
# store bools as 1,0 instead of True,False
out = unicode(int(out))
else:
out = unicode(out)
elif style.as_type == int:
if out is None:
out = 0
else:
out = int(out)
elif style.as_type in (bool, str):
out = style.as_type(out)
# MPEG-4 "freeform" (----) frames must be encoded as UTF-8
# byte strings.
if obj.type == 'mp4' and style.key.startswith('----:') and \
isinstance(out, unicode):
out = out.encode('utf8')
# Store the data.
self._storedata(obj, out, style)
class CompositeDateField(object):
"""A MediaFile field for conveniently accessing the year, month, and
day fields as a datetime.date object. Allows both getting and
setting of the component fields.
"""
def __init__(self, year_field, month_field, day_field):
"""Create a new date field from the indicated MediaFields for
the component values.
"""
self.year_field = year_field
self.month_field = month_field
self.day_field = day_field
def __get__(self, obj, owner):
"""Return a datetime.date object whose components indicating the
smallest valid date whose components are at least as large as
the three component fields (that is, if year == 1999, month == 0,
and day == 0, then date == datetime.date(1999, 1, 1)). If the
components indicate an invalid date (e.g., if month == 47),
datetime.date.min is returned.
"""
try:
return datetime.date(
max(self.year_field.__get__(obj, owner), datetime.MINYEAR),
max(self.month_field.__get__(obj, owner), 1),
max(self.day_field.__get__(obj, owner), 1)
)
except ValueError: # Out of range values.
return datetime.date.min
def __set__(self, obj, val):
"""Set the year, month, and day fields to match the components of
the provided datetime.date object.
"""
self.year_field.__set__(obj, val.year)
self.month_field.__set__(obj, val.month)
self.day_field.__set__(obj, val.day)
class ImageField(object):
"""A descriptor providing access to a file's embedded album art.
Holds a bytestring reflecting the image data. The image should
either be a JPEG or a PNG for cross-format compatibility. It's
probably a bad idea to use anything but these two formats.
"""
@classmethod
def _mime(cls, data):
"""Return the MIME type (either image/png or image/jpeg) of the
image data (a bytestring).
"""
kind = imghdr.what(None, h=data)
if kind == 'png':
return 'image/png'
else:
# Currently just fall back to JPEG.
return 'image/jpeg'
@classmethod
def _mp4kind(cls, data):
"""Return the MPEG-4 image type code of the data. If the image
is not a PNG or JPEG, JPEG is assumed.
"""
kind = imghdr.what(None, h=data)
if kind == 'png':
return mutagen.mp4.MP4Cover.FORMAT_PNG
else:
return mutagen.mp4.MP4Cover.FORMAT_JPEG
def __get__(self, obj, owner):
if obj.type == 'mp3':
# Look for APIC frames.
for frame in obj.mgfile.tags.values():
if frame.FrameID == 'APIC':
picframe = frame
break
else:
# No APIC frame.
return None
return picframe.data
elif obj.type == 'mp4':
if 'covr' in obj.mgfile:
covers = obj.mgfile['covr']
if covers:
cover = covers[0]
# cover is an MP4Cover, which is a subclass of str.
return cover
# No cover found.
return None
else:
# Here we're assuming everything but MP3 and MPEG-4 uses
# the Xiph/Vorbis Comments standard. This may not be valid.
# http://wiki.xiph.org/VorbisComment#Cover_art
if 'metadata_block_picture' not in obj.mgfile:
# Try legacy COVERART tags.
if 'coverart' in obj.mgfile and obj.mgfile['coverart']:
return base64.b64decode(obj.mgfile['coverart'][0])
return None
for data in obj.mgfile["metadata_block_picture"]:
try:
pic = mutagen.flac.Picture(base64.b64decode(data))
break
except TypeError:
pass
else:
return None
return pic.data
def __set__(self, obj, val):
if val is not None:
if not isinstance(val, str):
raise ValueError('value must be a byte string or None')
if obj.type == 'mp3':
# Clear all APIC frames.
obj.mgfile.tags.delall('APIC')
if val is None:
# If we're clearing the image, we're done.
return
picframe = mutagen.id3.APIC(
encoding = 3,
mime = self._mime(val),
type = 3, # front cover
desc = u'',
data = val,
)
obj.mgfile['APIC'] = picframe
elif obj.type == 'mp4':
if val is None:
if 'covr' in obj.mgfile:
del obj.mgfile['covr']
else:
cover = mutagen.mp4.MP4Cover(val, self._mp4kind(val))
obj.mgfile['covr'] = [cover]
else:
# Again, assuming Vorbis Comments standard.
# Strip all art, including legacy COVERART.
if 'metadata_block_picture' in obj.mgfile:
if 'metadata_block_picture' in obj.mgfile:
del obj.mgfile['metadata_block_picture']
if 'coverart' in obj.mgfile:
del obj.mgfile['coverart']
if 'coverartmime' in obj.mgfile:
del obj.mgfile['coverartmime']
# Add new art if provided.
if val is not None:
pic = mutagen.flac.Picture()
pic.data = val
pic.mime = self._mime(val)
obj.mgfile['metadata_block_picture'] = [
base64.b64encode(pic.write())
]
# The file (a collection of fields).
class MediaFile(object):
"""Represents a multimedia file on disk and provides access to its
metadata.
"""
def __init__(self, path):
"""Constructs a new MediaFile reflecting the file at path. May
throw UnreadableFileError.
"""
self.path = path
unreadable_exc = (
mutagen.mp3.HeaderNotFoundError,
mutagen.flac.FLACNoHeaderError,
mutagen.monkeysaudio.MonkeysAudioHeaderError,
mutagen.mp4.MP4StreamInfoError,
mutagen.oggvorbis.OggVorbisHeaderError,
)
try:
self.mgfile = mutagen.File(path)
except unreadable_exc:
log.warn('header parsing failed')
raise UnreadableFileError('Mutagen could not read file')
except IOError:
raise UnreadableFileError('could not read file')
except:
# Hide bugs in Mutagen.
log.error('uncaught Mutagen exception:\n' + traceback.format_exc())
raise UnreadableFileError('Mutagen raised an exception')
if self.mgfile is None: # Mutagen couldn't guess the type
raise FileTypeError('file type unsupported by Mutagen')
elif type(self.mgfile).__name__ == 'M4A' or \
type(self.mgfile).__name__ == 'MP4':
self.type = 'mp4'
elif type(self.mgfile).__name__ == 'ID3' or \
type(self.mgfile).__name__ == 'MP3':
self.type = 'mp3'
elif type(self.mgfile).__name__ == 'FLAC':
self.type = 'flac'
elif type(self.mgfile).__name__ == 'OggVorbis':
self.type = 'ogg'
elif type(self.mgfile).__name__ == 'MonkeysAudio':
self.type = 'ape'
elif type(self.mgfile).__name__ == 'WavPack':
self.type = 'wv'
elif type(self.mgfile).__name__ == 'Musepack':
self.type = 'mpc'
else:
raise FileTypeError('file type %s unsupported by MediaFile' %
type(self.mgfile).__name__)
# add a set of tags if it's missing
if self.mgfile.tags is None:
self.mgfile.add_tags()
def save(self):
self.mgfile.save()
#### field definitions ####
title = MediaField(
mp3 = StorageStyle('TIT2'),
mp4 = StorageStyle("\xa9nam"),
etc = StorageStyle('title'),
)
artist = MediaField(
mp3 = StorageStyle('TPE1'),
mp4 = StorageStyle("\xa9ART"),
etc = StorageStyle('artist'),
)
album = MediaField(
mp3 = StorageStyle('TALB'),
mp4 = StorageStyle("\xa9alb"),
etc = StorageStyle('album'),
)
genre = MediaField(
mp3 = StorageStyle('TCON'),
mp4 = StorageStyle("\xa9gen"),
etc = StorageStyle('genre'),
)
composer = MediaField(
mp3 = StorageStyle('TCOM'),
mp4 = StorageStyle("\xa9wrt"),
etc = StorageStyle('composer'),
)
grouping = MediaField(
mp3 = StorageStyle('TIT1'),
mp4 = StorageStyle("\xa9grp"),
etc = StorageStyle('grouping'),
)
year = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 0),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 0),
etc = [StorageStyle('date',
packing = packing.DATE,
pack_pos = 0),
StorageStyle('year')]
)
month = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 1),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 1),
etc = StorageStyle('date',
packing = packing.DATE,
pack_pos = 1)
)
day = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 2),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 2),
etc = StorageStyle('date',
packing = packing.DATE,
pack_pos = 2)
)
date = CompositeDateField(year, month, day)
track = MediaField(out_type = int,
mp3 = StorageStyle('TRCK',
packing = packing.SLASHED,
pack_pos = 0),
mp4 = StorageStyle('trkn',
packing = packing.TUPLE,
pack_pos = 0),
etc = [StorageStyle('track'),
StorageStyle('tracknumber')]
)
tracktotal = MediaField(out_type = int,
mp3 = StorageStyle('TRCK',
packing = packing.SLASHED,
pack_pos = 1),
mp4 = StorageStyle('trkn',
packing = packing.TUPLE,
pack_pos = 1),
etc = [StorageStyle('tracktotal'),
StorageStyle('trackc'),
StorageStyle('totaltracks')]
)
disc = MediaField(out_type = int,
mp3 = StorageStyle('TPOS',
packing = packing.SLASHED,
pack_pos = 0),
mp4 = StorageStyle('disk',
packing = packing.TUPLE,
pack_pos = 0),
etc = [StorageStyle('disc'),
StorageStyle('discnumber')]
)
disctotal = MediaField(out_type = int,
mp3 = StorageStyle('TPOS',
packing = packing.SLASHED,
pack_pos = 1),
mp4 = StorageStyle('disk',
packing = packing.TUPLE,
pack_pos = 1),
etc = [StorageStyle('disctotal'),
StorageStyle('discc'),
StorageStyle('totaldiscs')]
)
lyrics = MediaField(
mp3 = StorageStyle('USLT',
list_elem = False,
id3_desc = u''),
mp4 = StorageStyle("\xa9lyr"),
etc = StorageStyle('lyrics')
)
comments = MediaField(
mp3 = StorageStyle('COMM', id3_desc = u''),
mp4 = StorageStyle("\xa9cmt"),
etc = [StorageStyle('description'),
StorageStyle('comment')]
)
bpm = MediaField(out_type = int,
mp3 = StorageStyle('TBPM'),
mp4 = StorageStyle('tmpo', as_type = int),
etc = StorageStyle('bpm')
)
comp = MediaField(out_type = bool,
mp3 = StorageStyle('TCMP'),
mp4 = StorageStyle('cpil',
list_elem = False,
as_type = bool),
etc = StorageStyle('compilation')
)
albumartist = MediaField(
mp3 = StorageStyle('TPE2'),
mp4 = StorageStyle('aART'),
etc = [StorageStyle('album artist'),
StorageStyle('albumartist')]
)
albumtype = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Type'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Type'),
etc = StorageStyle('musicbrainz_albumtype')
)
label = MediaField(
mp3 = StorageStyle('TPUB'),
mp4 = [StorageStyle('----:com.apple.iTunes:Label'),
StorageStyle('----:com.apple.iTunes:publisher')],
etc = [StorageStyle('label'),
StorageStyle('publisher')] # Traktor
)
# Album art.
art = ImageField()
# MusicBrainz IDs.
mb_trackid = MediaField(
mp3 = StorageStyle('UFID:http://musicbrainz.org',
list_elem = False,
id3_frame_field = 'data'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Track Id',
as_type=str),
etc = StorageStyle('musicbrainz_trackid')
)
mb_albumid = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Id',
as_type=str),
etc = StorageStyle('musicbrainz_albumid')
)
mb_artistid = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Artist Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Artist Id',
as_type=str),
etc = StorageStyle('musicbrainz_artistid')
)
mb_albumartistid = MediaField(
mp3 = StorageStyle('TXXX',
id3_desc=u'MusicBrainz Album Artist Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Artist Id',
as_type=str),
etc = StorageStyle('musicbrainz_albumartistid')
)
@property
def length(self):
return self.mgfile.info.length
@property
def bitrate(self):
if hasattr(self.mgfile.info, 'bitrate'):
# Many formats provide it explicitly.
return self.mgfile.info.bitrate
else:
# Otherwise, we calculate bitrate from the file size. (This
# is the case for all of the lossless formats.)
size = os.path.getsize(self.path)
return int(size * 8 / self.length)
@property
def format(self):
return TYPES[self.type]
+8 -5
View File
@@ -1,5 +1,5 @@
# This file is part of beets.
# Copyright 2010, Adrian Sampson.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@@ -22,6 +22,9 @@ from collections import defaultdict
PLUGIN_NAMESPACE = 'beetsplug'
DEFAULT_PLUGINS = []
# Plugins using the Last.fm API can share the same API key.
LASTFM_KEY = '2dc3914abf35f0d9c92d97d8f8e42b43'
# Global logger.
log = logging.getLogger('beets')
@@ -52,14 +55,14 @@ class BeetsPlugin(object):
return 0.0, 0.0
def candidates(self, items):
"""Should return a sequence of MusicBrainz info dictionaries
that match the album whose items are provided.
"""Should return a sequence of AlbumInfo objects that match the
album whose items are provided.
"""
return ()
def item_candidates(self, item):
"""Should return a sequence of MusicBrainz track info
dictionaries that match the item provided.
"""Should return a sequence of TrackInfo objects that match the
item provided.
"""
return ()
+30 -14
View File
@@ -52,6 +52,21 @@ class UserError(Exception):
# Utilities.
def _encoding():
"""Tries to guess the encoding uses by the terminal."""
try:
return locale.getdefaultlocale()[1] or 'utf8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
return 'utf8'
def decargs(arglist):
"""Given a list of command-line argument bytestrings, attempts to
decode them to Unicode strings.
"""
return [s.decode(_encoding()) for s in arglist]
def print_(*strings):
"""Like print, but rather than raising an error when a character
is not in the terminal's encoding's character set, just silently
@@ -65,13 +80,7 @@ def print_(*strings):
else:
txt = u''
if isinstance(txt, unicode):
try:
encoding = locale.getdefaultlocale()[1] or 'utf8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
encoding = 'utf8'
txt = txt.encode(encoding, 'replace')
txt = txt.encode(_encoding(), 'replace')
print txt
def input_options(options, require=False, prompt=None, fallback_prompt=None,
@@ -247,10 +256,6 @@ def input_yn(prompt, require=False, color=False):
)
return sel == 'y'
def make_query(criteria):
"""Make query string for the list of criteria."""
return ' '.join(criteria).strip() or None
def config_val(config, section, name, default, vtype=None):
"""Queries the configuration file for a value (given by the
section and name). If no value is present, returns default.
@@ -326,9 +331,20 @@ def colorize(color, text):
return escape + text + RESET_COLOR
def colordiff(a, b, highlight='red'):
"""Given two strings, return the same pair of strings except with
their differences highlighted in the specified color.
"""Given two values, return the same pair of strings except with
their differences highlighted in the specified color. Strings are
highlighted intelligently to show differences; other values are
stringified and highlighted in their entirety.
"""
if not isinstance(a, basestring) or not isinstance(b, basestring):
# Non-strings: use ordinary equality.
a = unicode(a)
b = unicode(b)
if a == b:
return a, b
else:
return colorize(highlight, a), colorize(highlight, b)
a_out = []
b_out = []
@@ -351,7 +367,7 @@ def colordiff(a, b, highlight='red'):
else:
assert(False)
return ''.join(a_out), ''.join(b_out)
return u''.join(a_out), u''.join(b_out)
# Subcommand parsing infrastructure.
+632
View File
@@ -0,0 +1,632 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""This module contains all of the core logic for beets' command-line
interface. To invoke the CLI, just call beets.ui.main(). The actual
CLI commands are implemented in the ui.commands module.
"""
import os
import locale
import optparse
import textwrap
import ConfigParser
import sys
from difflib import SequenceMatcher
import logging
import sqlite3
import errno
from beets import library
from beets import plugins
from beets import util
# Constants.
CONFIG_PATH_VAR = 'BEETSCONFIG'
DEFAULT_CONFIG_FILE = os.path.expanduser('~/.beetsconfig')
DEFAULT_LIBRARY = '~/.beetsmusic.blb'
DEFAULT_DIRECTORY = '~/Music'
DEFAULT_PATH_FORMATS = {
'default': '$albumartist/$album/$track $title',
'comp': 'Compilations/$album/$track $title',
'singleton': 'Non-Album/$artist/$title',
}
DEFAULT_ART_FILENAME = 'cover'
# UI exception. Commands should throw this in order to display
# nonrecoverable errors to the user.
class UserError(Exception):
pass
# Utilities.
def _encoding():
"""Tries to guess the encoding uses by the terminal."""
try:
return locale.getdefaultlocale()[1] or 'utf8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
return 'utf8'
def decargs(arglist):
"""Given a list of command-line argument bytestrings, attempts to
decode them to Unicode strings.
"""
return [s.decode(_encoding()) for s in arglist]
def print_(*strings):
"""Like print, but rather than raising an error when a character
is not in the terminal's encoding's character set, just silently
replaces it.
"""
if strings:
if isinstance(strings[0], unicode):
txt = u' '.join(strings)
else:
txt = ' '.join(strings)
else:
txt = u''
if isinstance(txt, unicode):
txt = txt.encode(_encoding(), 'replace')
print txt
def input_options(options, require=False, prompt=None, fallback_prompt=None,
numrange=None, default=None, color=False, max_width=72):
"""Prompts a user for input. The sequence of `options` defines the
choices the user has. A single-letter shortcut is inferred for each
option; the user's choice is returned as that single, lower-case
letter. The options should be provided as lower-case strings unless
a particular shortcut is desired; in that case, only that letter
should be capitalized.
By default, the first option is the default. If `require` is
provided, then there is no default. `default` can be provided to
override this. The prompt and fallback prompt are also inferred but
can be overridden.
If numrange is provided, it is a pair of `(high, low)` (both ints)
indicating that, in addition to `options`, the user may enter an
integer in that inclusive range.
`max_width` specifies the maximum number of columns in the
automatically generated prompt string.
"""
# Assign single letters to each option. Also capitalize the options
# to indicate the letter.
letters = {}
display_letters = []
capitalized = []
first = True
for option in options:
# Is a letter already capitalized?
for letter in option:
if letter.isalpha() and letter.upper() == letter:
found_letter = letter
break
else:
# Infer a letter.
for letter in option:
if not letter.isalpha():
continue # Don't use punctuation.
if letter not in letters:
found_letter = letter
break
else:
raise ValueError('no unambiguous lettering found')
letters[found_letter.lower()] = option
index = option.index(found_letter)
# Mark the option's shortcut letter for display.
if (default is None and not numrange and first) \
or (isinstance(default, basestring) and
found_letter.lower() == default.lower()):
# The first option is the default; mark it.
show_letter = '[%s]' % found_letter.upper()
is_default = True
else:
show_letter = found_letter.upper()
is_default = False
# Possibly colorize the letter shortcut.
if color:
color = 'turquoise' if is_default else 'blue'
show_letter = colorize(color, show_letter)
# Insert the highlighted letter back into the word.
capitalized.append(
option[:index] + show_letter + option[index+1:]
)
display_letters.append(found_letter.upper())
first = False
# The default is just the first option if unspecified.
if default is None:
if require:
default = None
elif numrange:
default = numrange[0]
else:
default = display_letters[0].lower()
# Make a prompt if one is not provided.
if not prompt:
prompt_parts = []
prompt_part_lengths = []
if numrange:
if isinstance(default, int):
default_name = str(default)
if color:
default_name = colorize('turquoise', default_name)
tmpl = '# selection (default %s)'
prompt_parts.append(tmpl % default_name)
prompt_part_lengths.append(len(tmpl % str(default)))
else:
prompt_parts.append('# selection')
prompt_part_lengths.append(prompt_parts[-1])
prompt_parts += capitalized
prompt_part_lengths += [len(s) for s in options]
# Wrap the query text.
prompt = ''
line_length = 0
for i, (part, length) in enumerate(zip(prompt_parts,
prompt_part_lengths)):
# Add punctuation.
if i == len(prompt_parts) - 1:
part += '?'
else:
part += ','
length += 1
# Choose either the current line or the beginning of the next.
if line_length + length + 1 > max_width:
prompt += '\n'
line_length = 0
if line_length != 0:
# Not the beginning of the line; need a space.
part = ' ' + part
length += 1
prompt += part
line_length += length
# Make a fallback prompt too. This is displayed if the user enters
# something that is not recognized.
if not fallback_prompt:
fallback_prompt = 'Enter one of '
if numrange:
fallback_prompt += '%i-%i, ' % numrange
fallback_prompt += ', '.join(display_letters) + ':'
# (raw_input(prompt) was causing problems with colors.)
print prompt,
resp = raw_input()
while True:
resp = resp.strip().lower()
# Try default option.
if default is not None and not resp:
resp = default
# Try an integer input if available.
if numrange:
try:
resp = int(resp)
except ValueError:
pass
else:
low, high = numrange
if low <= resp <= high:
return resp
else:
resp = None
# Try a normal letter input.
if resp:
resp = resp[0]
if resp in letters:
return resp
# Prompt for new input.
print fallback_prompt,
resp = raw_input()
def input_yn(prompt, require=False, color=False):
"""Prompts the user for a "yes" or "no" response. The default is
"yes" unless `require` is `True`, in which case there is no default.
"""
sel = input_options(
('y', 'n'), require, prompt, 'Enter Y or N:', color=color
)
return sel == 'y'
def config_val(config, section, name, default, vtype=None):
"""Queries the configuration file for a value (given by the
section and name). If no value is present, returns default.
vtype optionally specifies the return type (although only bool
is supported for now).
"""
if not config.has_section(section):
config.add_section(section)
try:
if vtype is bool:
return config.getboolean(section, name)
else:
return config.get(section, name)
except ConfigParser.NoOptionError:
return default
def human_bytes(size):
"""Formats size, a number of bytes, in a human-readable way."""
suffices = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB', 'HB']
for suffix in suffices:
if size < 1024:
return "%3.1f %s" % (size, suffix)
size /= 1024.0
return "big"
def human_seconds(interval):
"""Formats interval, a number of seconds, as a human-readable time
interval.
"""
units = [
(1, 'second'),
(60, 'minute'),
(60, 'hour'),
(24, 'day'),
(7, 'week'),
(52, 'year'),
(10, 'decade'),
]
for i in range(len(units)-1):
increment, suffix = units[i]
next_increment, _ = units[i+1]
interval /= float(increment)
if interval < next_increment:
break
else:
# Last unit.
increment, suffix = units[-1]
interval /= float(increment)
return "%3.1f %ss" % (interval, suffix)
# ANSI terminal colorization code heavily inspired by pygments:
# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py
# (pygments is by Tim Hatch, Armin Ronacher, et al.)
COLOR_ESCAPE = "\x1b["
DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue",
"purple", "teal", "lightgray"]
LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue",
"fuchsia", "turquoise", "white"]
RESET_COLOR = COLOR_ESCAPE + "39;49;00m"
def colorize(color, text):
"""Returns a string that prints the given text in the given color
in a terminal that is ANSI color-aware. The color must be something
in DARK_COLORS or LIGHT_COLORS.
"""
if color in DARK_COLORS:
escape = COLOR_ESCAPE + "%im" % (DARK_COLORS.index(color) + 30)
elif color in LIGHT_COLORS:
escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS.index(color) + 30)
else:
raise ValueError('no such color %s', color)
return escape + text + RESET_COLOR
def colordiff(a, b, highlight='red'):
"""Given two values, return the same pair of strings except with
their differences highlighted in the specified color. Strings are
highlighted intelligently to show differences; other values are
stringified and highlighted in their entirety.
"""
if not isinstance(a, basestring) or not isinstance(b, basestring):
# Non-strings: use ordinary equality.
a = unicode(a)
b = unicode(b)
if a == b:
return a, b
else:
return colorize(highlight, a), colorize(highlight, b)
a_out = []
b_out = []
matcher = SequenceMatcher(lambda x: False, a, b)
for op, a_start, a_end, b_start, b_end in matcher.get_opcodes():
if op == 'equal':
# In both strings.
a_out.append(a[a_start:a_end])
b_out.append(b[b_start:b_end])
elif op == 'insert':
# Right only.
b_out.append(colorize(highlight, b[b_start:b_end]))
elif op == 'delete':
# Left only.
a_out.append(colorize(highlight, a[a_start:a_end]))
elif op == 'replace':
# Right and left differ.
a_out.append(colorize(highlight, a[a_start:a_end]))
b_out.append(colorize(highlight, b[b_start:b_end]))
else:
assert(False)
return u''.join(a_out), u''.join(b_out)
# Subcommand parsing infrastructure.
# This is a fairly generic subcommand parser for optparse. It is
# maintained externally here:
# http://gist.github.com/462717
# There you will also find a better description of the code and a more
# succinct example program.
class Subcommand(object):
"""A subcommand of a root command-line application that may be
invoked by a SubcommandOptionParser.
"""
def __init__(self, name, parser=None, help='', aliases=()):
"""Creates a new subcommand. name is the primary way to invoke
the subcommand; aliases are alternate names. parser is an
OptionParser responsible for parsing the subcommand's options.
help is a short description of the command. If no parser is
given, it defaults to a new, empty OptionParser.
"""
self.name = name
self.parser = parser or optparse.OptionParser()
self.aliases = aliases
self.help = help
class SubcommandsOptionParser(optparse.OptionParser):
"""A variant of OptionParser that parses subcommands and their
arguments.
"""
# A singleton command used to give help on other subcommands.
_HelpSubcommand = Subcommand('help', optparse.OptionParser(),
help='give detailed help on a specific sub-command',
aliases=('?',))
def __init__(self, *args, **kwargs):
"""Create a new subcommand-aware option parser. All of the
options to OptionParser.__init__ are supported in addition
to subcommands, a sequence of Subcommand objects.
"""
# The subcommand array, with the help command included.
self.subcommands = list(kwargs.pop('subcommands', []))
self.subcommands.append(self._HelpSubcommand)
# A more helpful default usage.
if 'usage' not in kwargs:
kwargs['usage'] = """
%prog COMMAND [ARGS...]
%prog help COMMAND"""
# Super constructor.
optparse.OptionParser.__init__(self, *args, **kwargs)
# Adjust the help-visible name of each subcommand.
for subcommand in self.subcommands:
subcommand.parser.prog = '%s %s' % \
(self.get_prog_name(), subcommand.name)
# Our root parser needs to stop on the first unrecognized argument.
self.disable_interspersed_args()
def add_subcommand(self, cmd):
"""Adds a Subcommand object to the parser's list of commands.
"""
self.subcommands.append(cmd)
# Add the list of subcommands to the help message.
def format_help(self, formatter=None):
# Get the original help message, to which we will append.
out = optparse.OptionParser.format_help(self, formatter)
if formatter is None:
formatter = self.formatter
# Subcommands header.
result = ["\n"]
result.append(formatter.format_heading('Commands'))
formatter.indent()
# Generate the display names (including aliases).
# Also determine the help position.
disp_names = []
help_position = 0
for subcommand in self.subcommands:
name = subcommand.name
if subcommand.aliases:
name += ' (%s)' % ', '.join(subcommand.aliases)
disp_names.append(name)
# Set the help position based on the max width.
proposed_help_position = len(name) + formatter.current_indent + 2
if proposed_help_position <= formatter.max_help_position:
help_position = max(help_position, proposed_help_position)
# Add each subcommand to the output.
for subcommand, name in zip(self.subcommands, disp_names):
# Lifted directly from optparse.py.
name_width = help_position - formatter.current_indent - 2
if len(name) > name_width:
name = "%*s%s\n" % (formatter.current_indent, "", name)
indent_first = help_position
else:
name = "%*s%-*s " % (formatter.current_indent, "",
name_width, name)
indent_first = 0
result.append(name)
help_width = formatter.width - help_position
help_lines = textwrap.wrap(subcommand.help, help_width)
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
result.extend(["%*s%s\n" % (help_position, "", line)
for line in help_lines[1:]])
formatter.dedent()
# Concatenate the original help message with the subcommand
# list.
return out + "".join(result)
def _subcommand_for_name(self, name):
"""Return the subcommand in self.subcommands matching the
given name. The name may either be the name of a subcommand or
an alias. If no subcommand matches, returns None.
"""
for subcommand in self.subcommands:
if name == subcommand.name or \
name in subcommand.aliases:
return subcommand
return None
def parse_args(self, a=None, v=None):
"""Like OptionParser.parse_args, but returns these four items:
- options: the options passed to the root parser
- subcommand: the Subcommand object that was invoked
- suboptions: the options passed to the subcommand parser
- subargs: the positional arguments passed to the subcommand
"""
options, args = optparse.OptionParser.parse_args(self, a, v)
if not args:
# No command given.
self.print_help()
self.exit()
else:
cmdname = args.pop(0)
subcommand = self._subcommand_for_name(cmdname)
if not subcommand:
self.error('unknown command ' + cmdname)
suboptions, subargs = subcommand.parser.parse_args(args)
if subcommand is self._HelpSubcommand:
if subargs:
# particular
cmdname = subargs[0]
helpcommand = self._subcommand_for_name(cmdname)
helpcommand.parser.print_help()
self.exit()
else:
# general
self.print_help()
self.exit()
return options, subcommand, suboptions, subargs
# The root parser and its main function.
def main(args=None, configfh=None):
"""Run the main command-line interface for beets."""
# Get the default subcommands.
from beets.ui.commands import default_commands
# Read defaults from config file.
config = ConfigParser.SafeConfigParser()
if configfh:
configpath = None
elif CONFIG_PATH_VAR in os.environ:
configpath = os.path.expanduser(os.environ[CONFIG_PATH_VAR])
else:
configpath = DEFAULT_CONFIG_FILE
if configpath:
configpath = util.syspath(configpath)
if os.path.exists(util.syspath(configpath)):
configfh = open(configpath)
else:
configfh = None
if configfh:
config.readfp(configfh)
# Add plugin paths.
plugpaths = config_val(config, 'beets', 'pluginpath', '')
for plugpath in plugpaths.split(':'):
sys.path.append(os.path.expanduser(plugpath))
# Load requested plugins.
plugnames = config_val(config, 'beets', 'plugins', '')
plugins.load_plugins(plugnames.split())
plugins.load_listeners()
plugins.send("pluginload")
plugins.configure(config)
# Construct the root parser.
commands = list(default_commands)
commands += plugins.commands()
parser = SubcommandsOptionParser(subcommands=commands)
parser.add_option('-l', '--library', dest='libpath',
help='library database file to use')
parser.add_option('-d', '--directory', dest='directory',
help="destination music directory")
parser.add_option('-p', '--pathformat', dest='path_format',
help="destination path format string")
parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
help='print debugging information')
# Parse the command-line!
options, subcommand, suboptions, subargs = parser.parse_args(args)
# Open library file.
libpath = options.libpath or \
config_val(config, 'beets', 'library', DEFAULT_LIBRARY)
directory = options.directory or \
config_val(config, 'beets', 'directory', DEFAULT_DIRECTORY)
legacy_path_format = config_val(config, 'beets', 'path_format', None)
if options.path_format:
# If given, -p overrides all path format settings
path_formats = {'default': options.path_format}
else:
if legacy_path_format:
# Old path formats override the default values.
path_formats = {'default': legacy_path_format}
else:
# If no legacy path format, use the defaults instead.
path_formats = DEFAULT_PATH_FORMATS
if config.has_section('paths'):
path_formats.update(config.items('paths'))
art_filename = \
config_val(config, 'beets', 'art_filename', DEFAULT_ART_FILENAME)
db_path = os.path.expanduser(libpath)
try:
lib = library.Library(db_path,
directory,
path_formats,
art_filename)
except sqlite3.OperationalError:
raise UserError("database file %s could not be opened" % db_path)
# Configure the logger.
log = logging.getLogger('beets')
if options.verbose:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.INFO)
# Invoke the subcommand.
try:
subcommand.func(lib, config, suboptions, subargs)
except UserError, exc:
message = exc.args[0] if exc.args else None
subcommand.parser.error(message)
except IOError, exc:
if exc.errno == errno.EPIPE:
# "Broken pipe". End silently.
pass
else:
raise
+358 -44
View File
@@ -20,14 +20,17 @@ import logging
import sys
import os
import time
import itertools
import re
from lib.beets import ui
from lib.beets.ui import print_
from lib.beets.ui import print_, decargs
from lib.beets import autotag
import lib.beets.autotag.art as beets.autotag.art
import lib.beets.autotag.art
from lib.beets import plugins
from lib.beets import importer
from lib.beets.util import syspath, normpath
from lib.beets.util import syspath, normpath, ancestry
from lib.beets import library
# Global logger.
log = logging.getLogger('beets')
@@ -36,6 +39,49 @@ log = logging.getLogger('beets')
# objects that can be fed to a SubcommandsOptionParser.
default_commands = []
# Utility.
def _do_query(lib, query, album, also_items=True):
"""For commands that operate on matched items, performs a query
and returns a list of matching items and a list of matching
albums. (The latter is only nonempty when album is True.) Raises
a UserError if no items match. also_items controls whether, when
fetching albums, the associated items should be fetched also.
"""
if album:
albums = list(lib.albums(query))
items = []
if also_items:
for al in albums:
items += al.items()
else:
albums = []
items = list(lib.items(query))
if album and not albums:
raise ui.UserError('No matching albums found.')
elif not album and not items:
raise ui.UserError('No matching items found.')
return items, albums
FLOAT_EPSILON = 0.01
def _showdiff(field, oldval, newval, color):
"""Prints out a human-readable field difference line."""
# Considering floats incomparable for perfect equality, introduce
# an epsilon tolerance.
if isinstance(oldval, float) and isinstance(newval, float) and \
abs(oldval - newval) < FLOAT_EPSILON:
return
if newval != oldval:
if color:
oldval, newval = ui.colordiff(oldval, newval)
else:
oldval, newval = unicode(oldval), unicode(newval)
print_(u' %s: %s -> %s' % (field, oldval, newval))
# import: Autotagger and importer.
@@ -48,6 +94,7 @@ DEFAULT_IMPORT_ART = True
DEFAULT_IMPORT_QUIET = False
DEFAULT_IMPORT_QUIET_FALLBACK = 'skip'
DEFAULT_IMPORT_RESUME = None # "ask"
DEFAULT_IMPORT_INCREMENTAL = False
DEFAULT_THREADED = True
DEFAULT_COLOR = True
@@ -83,10 +130,10 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True):
print_(' (unknown album)')
# Identify the album in question.
if cur_artist != info['artist'] or \
(cur_album != info['album'] and info['album'] != VARIOUS_ARTISTS):
artist_l, artist_r = cur_artist or '', info['artist']
album_l, album_r = cur_album or '', info['album']
if cur_artist != info.artist or \
(cur_album != info.album and info.album != VARIOUS_ARTISTS):
artist_l, artist_r = cur_artist or '', info.artist
album_l, album_r = cur_album or '', info.album
if artist_r == VARIOUS_ARTISTS:
# Hide artists for VA releases.
artist_l, artist_r = u'', u''
@@ -100,17 +147,17 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True):
print_("To:")
show_album(artist_r, album_r)
else:
print_("Tagging: %s - %s" % (info['artist'], info['album']))
print_("Tagging: %s - %s" % (info.artist, info.album))
# Distance/similarity.
print_('(Similarity: %s)' % dist_string(dist, color))
# Tracks.
for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
for i, (item, track_info) in enumerate(zip(items, info.tracks)):
cur_track = str(item.track)
new_track = str(i+1)
cur_title = item.title
new_title = track_data['title']
new_title = track_info.title
# Possibly colorize changes.
if color:
@@ -118,6 +165,10 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True):
if cur_track != new_track:
cur_track = ui.colorize('red', cur_track)
new_track = ui.colorize('red', new_track)
# Show filename (non-colorized) when title is not set.
if not item.title.strip():
cur_title = os.path.basename(item.path)
if cur_title != new_title and cur_track != new_track:
print_(" * %s (%s) -> %s (%s)" % (
@@ -132,8 +183,8 @@ def show_item_change(item, info, dist, color):
"""Print out the change that would occur by tagging `item` with the
metadata from `info`.
"""
cur_artist, new_artist = item.artist, info['artist']
cur_title, new_title = item.title, info['title']
cur_artist, new_artist = item.artist, info.artist
cur_title, new_title = item.title, info.title
if cur_artist != new_artist or cur_title != new_title:
if color:
@@ -177,7 +228,7 @@ def choose_candidate(candidates, singleton, rec, color, timid,
Returns the result of the choice, which may SKIP, ASIS, TRACKS, or
MANUAL or a candidate. For albums, a candidate is a `(info, items)`
pair; for items, it is just an `info` dictionary.
pair; for items, it is just a TrackInfo object.
"""
# Sanity check.
if singleton:
@@ -237,8 +288,24 @@ def choose_candidate(candidates, singleton, rec, color, timid,
(cur_artist, cur_album))
print_('Candidates:')
for i, (dist, items, info) in enumerate(candidates):
print_('%i. %s - %s (%s)' % (i+1, info['artist'],
info['album'], dist_string(dist, color)))
line = '%i. %s - %s' % (i+1, info['artist'],
info['album'])
# Label and year disambiguation, if available.
label, year = None, None
if 'label' in info:
label = info['label']
if 'year' in info and info['year']:
year = unicode(info['year'])
if label and year:
line += u' [%s, %s]' % (label, year)
elif label:
line += u' [%s]' % label
elif year:
line += u' [%s]' % year
line += ' (%s)' % dist_string(dist, color)
print_(line)
# Ask the user for a choice.
if singleton:
@@ -321,10 +388,20 @@ def manual_search(singleton):
return artist.strip(), name.strip()
def manual_id(singleton):
"""Input a MusicBrainz ID, either for an album or a track.
"""Input a MusicBrainz ID, either for an album ("release") or a
track ("recording"). If no valid ID is entered, returns None.
"""
prompt = 'Enter MusicBrainz %s ID: ' % ('track' if singleton else 'album')
return raw_input(prompt).decode(sys.stdin.encoding).strip()
prompt = 'Enter MusicBrainz %s ID: ' % \
('recording' if singleton else 'release')
entry = raw_input(prompt).decode(sys.stdin.encoding).strip()
# Find the first thing that looks like a UUID/MBID.
match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', entry)
if match:
return match.group()
else:
log.error('Invalid MBID.')
return None
def choose_match(task, config):
"""Given an initial autotagging of items, go through an interactive
@@ -370,12 +447,13 @@ def choose_match(task, config):
elif choice is importer.action.MANUAL_ID:
# Try a manually-entered ID.
search_id = manual_id(False)
try:
_, _, candidates, rec = \
autotag.tag_album(task.items, config.timid,
search_id=search_id)
except autotag.AutotagError:
candidates, rec = None, None
if search_id:
try:
_, _, candidates, rec = \
autotag.tag_album(task.items, config.timid,
search_id=search_id)
except autotag.AutotagError:
candidates, rec = None, None
else:
# We have a candidate! Finish tagging. Here, choice is
# an (info, items) pair as desired.
@@ -384,7 +462,7 @@ def choose_match(task, config):
def choose_item(task, config):
"""Ask the user for a choice about tagging a single item. Returns
either an action constant or a track info dictionary.
either an action constant or a TrackInfo object.
"""
print_()
print_(task.item.path)
@@ -416,8 +494,9 @@ def choose_item(task, config):
elif choice == importer.action.MANUAL_ID:
# Ask for a track ID.
search_id = manual_id(True)
candidates, rec = autotag.tag_item(task.item, config.timid,
search_id=search_id)
if search_id:
candidates, rec = autotag.tag_item(task.item, config.timid,
search_id=search_id)
else:
# Chose a candidate.
assert not isinstance(choice, importer.action)
@@ -427,7 +506,7 @@ def choose_item(task, config):
def import_files(lib, paths, copy, write, autot, logpath, art, threaded,
color, delete, quiet, resume, quiet_fallback, singletons,
timid):
timid, query, incremental):
"""Import the files in the given list of paths, tagging each leaf
directory as an album. If copy, then the files are copied into
the library folder. If write, then new metadata is written to the
@@ -487,6 +566,8 @@ def import_files(lib, paths, copy, write, autot, logpath, art, threaded,
singletons = singletons,
timid = timid,
choose_item_func = choose_item,
query = query,
incremental = incremental,
)
# If we were logging, close the file.
@@ -528,6 +609,10 @@ import_cmd.parser.add_option('-s', '--singletons', action='store_true',
help='import individual tracks instead of full albums')
import_cmd.parser.add_option('-t', '--timid', dest='timid',
action='store_true', help='always confirm all actions')
import_cmd.parser.add_option('-L', '--library', dest='library',
action='store_true', help='retag items matching a query')
import_cmd.parser.add_option('-i', '--incremental', dest='incremental',
action='store_true', help='skip already-imported directories')
def import_func(lib, config, opts, args):
copy = opts.copy if opts.copy is not None else \
ui.config_val(config, 'beets', 'import_copy',
@@ -553,6 +638,9 @@ def import_func(lib, config, opts, args):
DEFAULT_IMPORT_TIMID, bool)
logpath = opts.logpath if opts.logpath is not None else \
ui.config_val(config, 'beets', 'import_log', None)
incremental = opts.incremental if opts.incremental is not None else \
ui.config_val(config, 'beets', 'import_incremental',
DEFAULT_IMPORT_INCREMENTAL, bool)
# Resume has three options: yes, no, and "ask" (None).
resume = opts.resume if opts.resume is not None else \
@@ -569,9 +657,17 @@ def import_func(lib, config, opts, args):
quiet_fallback = importer.action.ASIS
else:
quiet_fallback = importer.action.SKIP
import_files(lib, args, copy, write, autot, logpath, art, threaded,
if opts.library:
query = args
paths = []
else:
query = None
paths = args
import_files(lib, paths, copy, write, autot, logpath, art, threaded,
color, delete, quiet, resume, quiet_fallback, singletons,
timid)
timid, query, incremental)
import_cmd.func = import_func
default_commands.append(import_cmd)
@@ -602,11 +698,104 @@ list_cmd.parser.add_option('-a', '--album', action='store_true',
list_cmd.parser.add_option('-p', '--path', action='store_true',
help='print paths for matched items or albums')
def list_func(lib, config, opts, args):
list_items(lib, ui.make_query(args), opts.album, opts.path)
list_items(lib, decargs(args), opts.album, opts.path)
list_cmd.func = list_func
default_commands.append(list_cmd)
# update: Update library contents according to on-disk tags.
def update_items(lib, query, album, move, color, pretend):
"""For all the items matched by the query, update the library to
reflect the item's embedded tags.
"""
items, _ = _do_query(lib, query, album)
# Walk through the items and pick up their changes.
affected_albums = set()
for item in items:
# Item deleted?
if not os.path.exists(syspath(item.path)):
print_(u'X %s - %s' % (item.artist, item.title))
if not pretend:
lib.remove(item, True)
affected_albums.add(item.album_id)
continue
# Read new data.
old_data = dict(item.record)
item.read()
# Special-case album artist when it matches track artist. (Hacky
# but necessary for preserving album-level metadata for non-
# autotagged imports.)
if not item.albumartist and \
old_data['albumartist'] == old_data['artist'] == item.artist:
item.albumartist = old_data['albumartist']
item.dirty['albumartist'] = False
# Get and save metadata changes.
changes = {}
for key in library.ITEM_KEYS_META:
if item.dirty[key]:
changes[key] = old_data[key], getattr(item, key)
if changes:
# Something changed.
print_(u'* %s - %s' % (item.artist, item.title))
for key, (oldval, newval) in changes.iteritems():
_showdiff(key, oldval, newval, color)
# If we're just pretending, then don't move or save.
if pretend:
continue
# Move the item if it's in the library.
if move and lib.directory in ancestry(item.path):
lib.move(item)
lib.store(item)
affected_albums.add(item.album_id)
# Skip album changes while pretending.
if pretend:
return
# Modify affected albums to reflect changes in their items.
for album_id in affected_albums:
if album_id is None: # Singletons.
continue
album = lib.get_album(album_id)
if not album: # Empty albums have already been removed.
log.debug('emptied album %i' % album_id)
continue
al_items = list(album.items())
# Update album structure to reflect an item in it.
for key in library.ALBUM_KEYS_ITEM:
setattr(album, key, getattr(al_items[0], key))
# Move album art (and any inconsistent items).
if move and lib.directory in ancestry(al_items[0].path):
log.debug('moving album %i' % album_id)
album.move()
lib.save()
update_cmd = ui.Subcommand('update',
help='update the library', aliases=('upd','up',))
update_cmd.parser.add_option('-a', '--album', action='store_true',
help='show matching albums instead of tracks')
update_cmd.parser.add_option('-M', '--nomove', action='store_false',
default=True, dest='move', help="don't move files in library")
update_cmd.parser.add_option('-p', '--pretend', action='store_true',
help="show all changes but do nothing")
def update_func(lib, config, opts, args):
color = ui.config_val(config, 'beets', 'color', DEFAULT_COLOR, bool)
update_items(lib, decargs(args), opts.album, opts.move, color, opts.pretend)
update_cmd.func = update_func
default_commands.append(update_cmd)
# remove: Remove items from library, delete files.
def remove_items(lib, query, album, delete=False):
@@ -614,17 +803,7 @@ def remove_items(lib, query, album, delete=False):
remove whole albums. If delete, also remove files from disk.
"""
# Get the matching items.
if album:
albums = list(lib.albums(query))
items = []
for al in albums:
items += al.items()
else:
items = list(lib.items(query))
if not items:
print_('No matching items found.')
return
items, albums = _do_query(lib, query, album)
# Show all the items.
for item in items:
@@ -657,7 +836,7 @@ remove_cmd.parser.add_option("-d", "--delete", action="store_true",
remove_cmd.parser.add_option('-a', '--album', action='store_true',
help='match albums instead of tracks')
def remove_func(lib, config, opts, args):
remove_items(lib, ui.make_query(args), opts.album, opts.delete)
remove_items(lib, decargs(args), opts.album, opts.delete)
remove_cmd.func = remove_func
default_commands.append(remove_cmd)
@@ -698,7 +877,7 @@ Albums: %i""" % (
stats_cmd = ui.Subcommand('stats',
help='show statistics about the library or a query')
def stats_func(lib, config, opts, args):
show_stats(lib, ui.make_query(args))
show_stats(lib, decargs(args))
stats_cmd.func = stats_func
default_commands.append(stats_cmd)
@@ -720,3 +899,138 @@ version_cmd = ui.Subcommand('version',
help='output version information')
version_cmd.func = show_version
default_commands.append(version_cmd)
# modify: Declaratively change metadata.
def modify_items(lib, mods, query, write, move, album, color, confirm):
"""Modifies matching items according to key=value assignments."""
# Parse key=value specifications into a dictionary.
allowed_keys = library.ALBUM_KEYS if album else library.ITEM_KEYS_WRITABLE
fsets = {}
for mod in mods:
key, value = mod.split('=', 1)
if key not in allowed_keys:
raise ui.UserError('"%s" is not a valid field' % key)
fsets[key] = value
# Get the items to modify.
items, albums = _do_query(lib, query, album, False)
objs = albums if album else items
# Preview change.
print_('Modifying %i %ss.' % (len(objs), 'album' if album else 'item'))
for obj in objs:
# Identify the changed object.
if album:
print_(u'* %s - %s' % (obj.albumartist, obj.album))
else:
print_(u'* %s - %s' % (obj.artist, obj.title))
# Show each change.
for field, value in fsets.iteritems():
curval = getattr(obj, field)
_showdiff(field, curval, value, color)
# Confirm.
if confirm:
extra = ' and write tags' if write else ''
if not ui.input_yn('Really modify%s (Y/n)?' % extra):
return
# Apply changes to database.
for obj in objs:
for field, value in fsets.iteritems():
setattr(obj, field, value)
if move:
cur_path = obj.item_dir() if album else obj.path
if lib.directory in ancestry(cur_path): # In library?
log.debug('moving object %s' % cur_path)
if album:
obj.move()
else:
lib.move(obj)
# When modifying items, we have to store them to the database.
if not album:
lib.store(obj)
lib.save()
# Apply tags if requested.
if write:
if album:
items = itertools.chain(*(a.items() for a in albums))
for item in items:
item.write()
modify_cmd = ui.Subcommand('modify',
help='change metadata fields', aliases=('mod',))
modify_cmd.parser.add_option('-M', '--nomove', action='store_false',
default=True, dest='move', help="don't move files in library")
modify_cmd.parser.add_option('-w', '--write', action='store_true',
default=None, help="write new metadata to files' tags (default)")
modify_cmd.parser.add_option('-W', '--nowrite', action='store_false',
dest='write', help="don't write metadata (opposite of -w)")
modify_cmd.parser.add_option('-a', '--album', action='store_true',
help='modify whole albums instead of tracks')
modify_cmd.parser.add_option('-y', '--yes', action='store_true',
help='skip confirmation')
def modify_func(lib, config, opts, args):
args = decargs(args)
mods = [a for a in args if '=' in a]
query = [a for a in args if '=' not in a]
if not mods:
raise ui.UserError('no modifications specified')
write = opts.write if opts.write is not None else \
ui.config_val(config, 'beets', 'import_write',
DEFAULT_IMPORT_WRITE, bool)
color = ui.config_val(config, 'beets', 'color', DEFAULT_COLOR, bool)
modify_items(lib, mods, query, write, opts.move, opts.album, color,
not opts.yes)
modify_cmd.func = modify_func
default_commands.append(modify_cmd)
# move: Move/copy files to the library or a new base directory.
def move_items(lib, dest, query, copy, album):
"""Moves or copies items to a new base directory, given by dest. If
dest is None, then the library's base directory is used, making the
command "consolidate" files.
"""
items, albums = _do_query(lib, query, album, False)
objs = albums if album else items
action = 'Copying' if copy else 'Moving'
entity = 'album' if album else 'item'
logging.info('%s %i %ss.' % (action, len(objs), entity))
for obj in objs:
old_path = obj.item_dir() if album else obj.path
logging.debug('moving: %s' % old_path)
if album:
obj.move(copy, basedir=dest)
else:
lib.move(obj, copy, basedir=dest)
lib.store(obj)
lib.save()
move_cmd = ui.Subcommand('move',
help='move or copy items', aliases=('mv',))
move_cmd.parser.add_option('-d', '--dest', metavar='DIR', dest='dest',
help='destination directory')
move_cmd.parser.add_option('-c', '--copy', default=False, action='store_true',
help='copy instead of moving')
move_cmd.parser.add_option('-a', '--album', default=False, action='store_true',
help='match whole albums instead of tracks')
def move_func(lib, config, opts, args):
dest = opts.dest
if dest is not None:
dest = normpath(dest)
if not os.path.isdir(dest):
raise ui.UserError('no such directory: %s' % dest)
move_items(lib, dest, decargs(args), opts.copy, opts.album)
move_cmd.func = move_func
default_commands.append(move_cmd)
+1036
View File
File diff suppressed because it is too large Load Diff
+73 -9
View File
@@ -16,6 +16,8 @@
import os
import sys
import re
import shutil
from collections import defaultdict
MAX_FILENAME_LENGTH = 200
@@ -82,11 +84,12 @@ def mkdirall(path):
if not os.path.isdir(syspath(ancestor)):
os.mkdir(syspath(ancestor))
def prune_dirs(path, root):
def prune_dirs(path, root, clutter=('.DS_Store', 'Thumbs.db')):
"""If path is an empty directory, then remove it. Recursively
remove path's ancestry up to root (which is never removed) where
there are empty directories. If path is not contained in root, then
nothing is removed.
nothing is removed. Filenames in clutter are ignored when
determining emptiness.
"""
path = normpath(path)
root = normpath(root)
@@ -100,9 +103,18 @@ def prune_dirs(path, root):
ancestors.append(path)
ancestors.reverse()
for directory in ancestors:
try:
os.rmdir(syspath(directory))
except OSError:
directory = syspath(directory)
if not os.path.exists(directory):
# Directory gone already.
continue
if all(fn in clutter for fn in os.listdir(directory)):
# Directory contains only clutter (or nothing).
try:
shutil.rmtree(directory)
except OSError:
break
else:
break
def components(path, pathmod=None):
@@ -162,26 +174,59 @@ def syspath(path, pathmod=None):
path = path.decode('utf8', 'replace')
# Add the magic prefix if it isn't already there
# Not sure what the magic prefix he was adding actually does but if it's a network path
# it breaks when we add the prefix - ignore the addition if the \\ is already there
if not path.startswith(u'\\\\?\\') and not path.startswith(u'\\'):
if not path.startswith(u'\\\\?\\'):
path = u'\\\\?\\' + path
return path
def samefile(p1, p2):
"""Safer equality for paths."""
return shutil._samefile(syspath(p1), syspath(p2))
def soft_remove(path):
"""Remove the file if it exists."""
path = syspath(path)
if os.path.exists(path):
os.remove(path)
def _assert_not_exists(path, pathmod=None):
"""Raises an OSError if the path exists."""
pathmod = pathmod or os.path
if pathmod.exists(path):
raise OSError('file exists: %s' % path)
def copy(path, dest, replace=False, pathmod=None):
"""Copy a plain file. Permissions are not copied. If dest already
exists, raises an OSError unless replace is True. Has no effect if
path is the same as dest. Paths are translated to system paths
before the syscall.
"""
if samefile(path, dest):
return
path = syspath(path)
dest = syspath(dest)
_assert_not_exists(dest, pathmod)
return shutil.copyfile(path, dest)
def move(path, dest, replace=False, pathmod=None):
"""Rename a file. dest may not be a directory. If dest already
exists, raises an OSError unless replace is True. Hos no effect if
path is the same as dest. Paths are translated to system paths.
"""
if samefile(path, dest):
return
path = syspath(path)
dest = syspath(dest)
_assert_not_exists(dest, pathmod)
return shutil.move(path, dest)
# Note: POSIX actually supports \ and : -- I just think they're
# a pain. And ? has caused problems for some.
CHAR_REPLACE = [
(re.compile(r'[\\/\?]|^\.'), '_'),
(re.compile(r':'), '-'),
]
CHAR_REPLACE_WINDOWS = re.compile('["\*<>\|]|^\.|\.$| +$'), '_'
CHAR_REPLACE_WINDOWS = re.compile(r'["\*<>\|]|^\.|\.$| +$'), '_'
def sanitize_path(path, pathmod=None):
"""Takes a path and makes sure that it is legal. Returns a new path.
Only works with fragments; won't work reliably on Windows when a
@@ -255,3 +300,22 @@ def levenshtein(s1, s2):
previous_row = current_row
return previous_row[-1]
def plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set and the frequency of that object.
"""
# Calculate frequencies.
freqs = defaultdict(int)
for obj in objs:
freqs[obj] += 1
# Find object with maximum frequency.
max_freq = 0
res = None
for obj, freq in freqs.items():
if freq > max_freq:
max_freq = freq
res = obj
return res, max_freq
+48
View File
@@ -0,0 +1,48 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""A simple utility for constructing filesystem-like trees from beets
libraries.
"""
from collections import namedtuple
from beets import util
Node = namedtuple('Node', ['files', 'dirs'])
def _insert(node, path, itemid):
"""Insert an item into a virtual filesystem node."""
if len(path) == 1:
# Last component. Insert file.
node.files[path[0]] = itemid
else:
# In a directory.
dirname = path[0]
rest = path[1:]
if dirname not in node.dirs:
node.dirs[dirname] = Node({}, {})
_insert(node.dirs[dirname], rest, itemid)
def libtree(lib):
"""Generates a filesystem-like directory tree for the files
contained in `lib`. Filesystem nodes are (files, dirs) named
tuples in which both components are dictionaries. The first
maps filenames to Item ids. The second maps directory names to
child node tuples.
"""
root = Node({}, {})
for item in lib.items():
dest = lib.destination(item, fragment=True)
parts = util.components(dest)
_insert(root, parts, item.id)
return root