diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py index c7ef23b6..3ef490cf 100644 --- a/lib/beets/__init__.py +++ b/lib/beets/__init__.py @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -16,7 +16,7 @@ # MODIFIED TO WORK WITH HEADPHONES!! # -__version__ = '1.0b14' +__version__ = '1.0b15' __author__ = 'Adrian Sampson ' from lib.beets.library import Library diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py index 2ea52e03..e4e4d1a0 100644 --- a/lib/beets/autotag/__init__.py +++ b/lib/beets/autotag/__init__.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -22,7 +22,7 @@ from lib.beets import library, mediafile from lib.beets.util import sorted_walk, ancestry # Parts of external interface. -from .hooks import AlbumInfo, TrackInfo +from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch from .match import AutotagError from .match import tag_item, tag_album from .match import RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE @@ -93,7 +93,7 @@ def albums_in_dir(path, ignore=()): collapse_root = root collapse_items = [] continue - + # If it's nonempty, yield it. if items: yield root, items @@ -106,6 +106,8 @@ def apply_item_metadata(item, track_info): """Set an item's metadata from its matched TrackInfo object. """ item.artist = track_info.artist + item.artist_sort = track_info.artist_sort + item.artist_credit = track_info.artist_credit item.title = track_info.title item.mb_trackid = track_info.track_id if track_info.artist_id: @@ -113,11 +115,12 @@ def apply_item_metadata(item, track_info): # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? -def apply_metadata(items, album_info): - """Set the items' metadata to match an AlbumInfo object. The list - of items must be ordered. +def apply_metadata(album_info, mapping, per_disc_numbering=False): + """Set the items' metadata to match an AlbumInfo object using a + mapping from Items to TrackInfo objects. If `per_disc_numbering`, + then the track numbers are per-disc instead of per-release. """ - for index, (item, track_info) in enumerate(zip(items, album_info.tracks)): + for item, track_info in mapping.iteritems(): # Album, artist, track count. if not item: continue @@ -127,8 +130,15 @@ def apply_metadata(items, album_info): item.artist = album_info.artist item.albumartist = album_info.artist item.album = album_info.album - item.tracktotal = len(items) - + item.tracktotal = len(album_info.tracks) + + # Artist sort and credit names. + item.artist_sort = track_info.artist_sort or album_info.artist_sort + item.artist_credit = track_info.artist_credit or \ + album_info.artist_credit + item.albumartist_sort = album_info.artist_sort + item.albumartist_credit = album_info.artist_credit + # Release date. if album_info.year: item.year = album_info.year @@ -136,15 +146,19 @@ def apply_metadata(items, album_info): item.month = album_info.month if album_info.day: item.day = album_info.day - - # Title and track index. + + # Title. item.title = track_info.title - item.track = index + 1 + + if per_disc_numbering: + item.track = track_info.medium_index + else: + item.track = track_info.index # Disc and disc count. item.disc = track_info.medium item.disctotal = album_info.mediums - + # MusicBrainz IDs. item.mb_trackid = track_info.track_id item.mb_albumid = album_info.album_id @@ -153,12 +167,25 @@ def apply_metadata(items, album_info): else: item.mb_artistid = album_info.artist_id item.mb_albumartistid = album_info.artist_id + item.mb_releasegroupid = album_info.releasegroup_id + + # Compilation flag. + item.comp = album_info.va + + # Miscellaneous metadata. item.albumtype = album_info.albumtype if album_info.label: item.label = album_info.label - # Compilation flag. - item.comp = album_info.va + item.asin = album_info.asin + item.catalognum = album_info.catalognum + item.script = album_info.script + item.language = album_info.language + item.country = album_info.country + item.albumstatus = album_info.albumstatus + item.media = album_info.media + item.albumdisambig = album_info.albumdisambig + item.disctitle = track_info.disctitle # Headphones seal of approval item.comments = 'tagged by headphones/beets' diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py index b4fa9826..d0042ce3 100644 --- a/lib/beets/autotag/hooks.py +++ b/lib/beets/autotag/hooks.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,15 +8,20 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Glue between metadata sources and the matching logic.""" +import logging +from collections import namedtuple from lib.beets import plugins from lib.beets.autotag import mb +log = logging.getLogger('beets') + + # Classes used to represent candidate options. class AlbumInfo(object): @@ -36,13 +41,26 @@ class AlbumInfo(object): - ``day``: release day - ``label``: music label responsible for the release - ``mediums``: the number of discs in this release + - ``artist_sort``: name of the release's artist for sorting + - ``releasegroup_id``: MBID for the album's release group + - ``catalognum``: the label's catalog number for the release + - ``script``: character set used for metadata + - ``language``: human language of the metadata + - ``country``: the release country + - ``albumstatus``: MusicBrainz release status (Official, etc.) + - ``media``: delivery mechanism (Vinyl, etc.) + - ``albumdisambig``: MusicBrainz release disambiguation comment + - ``artist_credit``: Release-specific artist name The fields up through ``tracks`` are required. The others are optional and may be None. """ def __init__(self, album, album_id, artist, artist_id, tracks, asin=None, albumtype=None, va=False, year=None, month=None, day=None, - label=None, mediums=None): + label=None, mediums=None, artist_sort=None, + releasegroup_id=None, catalognum=None, script=None, + language=None, country=None, albumstatus=None, media=None, + albumdisambig=None, artist_credit=None): self.album = album self.album_id = album_id self.artist = artist @@ -56,6 +74,16 @@ class AlbumInfo(object): self.day = day self.label = label self.mediums = mediums + self.artist_sort = artist_sort + self.releasegroup_id = releasegroup_id + self.catalognum = catalognum + self.script = script + self.language = language + self.country = country + self.albumstatus = albumstatus + self.media = media + self.albumdisambig = albumdisambig + self.artist_credit = artist_credit class TrackInfo(object): """Describes a canonical track present on a release. Appears as part @@ -66,32 +94,53 @@ class TrackInfo(object): - ``artist``: individual track artist name - ``artist_id`` - ``length``: float: duration of the track in seconds + - ``index``: position on the entire release - ``medium``: the disc number this track appears on in the album - ``medium_index``: the track's position on the disc + - ``artist_sort``: name of the track artist for sorting + - ``disctitle``: name of the individual medium (subtitle) + - ``artist_credit``: Recording-specific artist name Only ``title`` and ``track_id`` are required. The rest of the fields - may be None. + may be None. The indices ``index``, ``medium``, and ``medium_index`` + are all 1-based. """ def __init__(self, title, track_id, artist=None, artist_id=None, - length=None, medium=None, medium_index=None): + length=None, index=None, medium=None, medium_index=None, + artist_sort=None, disctitle=None, artist_credit=None): self.title = title self.track_id = track_id self.artist = artist self.artist_id = artist_id self.length = length + self.index = index self.medium = medium self.medium_index = medium_index + self.artist_sort = artist_sort + self.disctitle = disctitle + self.artist_credit = artist_credit + +AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping', + 'extra_items', 'extra_tracks']) + +TrackMatch = namedtuple('TrackMatch', ['distance', 'info']) # Aggregation of sources. def _album_for_id(album_id): """Get an album corresponding to a MusicBrainz release ID.""" - return mb.album_for_id(album_id) + try: + return mb.album_for_id(album_id) + except mb.MusicBrainzAPIError as exc: + exc.log(log) def _track_for_id(track_id): """Get an item for a recording MBID.""" - return mb.track_for_id(track_id) + try: + return mb.track_for_id(track_id) + except mb.MusicBrainzAPIError as exc: + exc.log(log) def _album_candidates(items, artist, album, va_likely): """Search for album matches. ``items`` is a list of Item objects @@ -104,11 +153,17 @@ def _album_candidates(items, artist, album, va_likely): # Base candidates if we have album and artist to match. if artist and album: - out.extend(mb.match_album(artist, album, len(items))) + try: + out.extend(mb.match_album(artist, album, len(items))) + except mb.MusicBrainzAPIError as exc: + exc.log(log) # Also add VA matches from MusicBrainz where appropriate. if va_likely and album: - out.extend(mb.match_album(None, album, len(items))) + try: + out.extend(mb.match_album(None, album, len(items))) + except mb.MusicBrainzAPIError as exc: + exc.log(log) # Candidates from plugins. out.extend(plugins.candidates(items)) @@ -124,7 +179,10 @@ def _item_candidates(item, artist, title): # MusicBrainz candidates. if artist and title: - out.extend(mb.match_track(artist, title)) + try: + out.extend(mb.match_track(artist, title)) + except mb.MusicBrainzAPIError as exc: + exc.log(log) # Plugin candidates. out.extend(plugins.item_candidates(item)) diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py index ac4d6cd0..1b42da49 100644 --- a/lib/beets/autotag/match.py +++ b/lib/beets/autotag/match.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,13 +8,15 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Matches existing metadata with canonical information to identify releases and tracks. """ +from __future__ import division + import logging import re from lib.munkres import Munkres @@ -33,6 +35,8 @@ ALBUM_WEIGHT = 3.0 TRACK_WEIGHT = 1.0 # The weight of a missing track. MISSING_WEIGHT = 0.9 +# The weight of an extra (umatched) track. +UNMATCHED_WEIGHT = 0.6 # These distances are components of the track distance (that is, they # compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT; # the overall TRACK_WEIGHT does that). @@ -112,7 +116,7 @@ def string_dist(str1, str2): """ str1 = str1.lower() str2 = str2.lower() - + # Don't penalize strings that move certain words to the end. For # example, "the something" should be considered equal to # "something, the". @@ -126,7 +130,7 @@ def string_dist(str1, str2): for pat, repl in SD_REPLACE: str1 = re.sub(pat, repl, str1) str2 = re.sub(pat, repl, str2) - + # Change the weight for certain string portions matched by a set # of regular expressions. We gradually change the strings and build # up penalties associated with parts of the string that were @@ -137,7 +141,7 @@ def string_dist(str1, str2): # Get strings that drop the pattern. case_str1 = re.sub(pat, '', str1) case_str2 = re.sub(pat, '', str2) - + if case_str1 != str1 or case_str2 != str2: # If the pattern was present (i.e., it is deleted in the # the current case), recalculate the distances for the @@ -146,7 +150,7 @@ def string_dist(str1, str2): case_delta = max(0.0, base_dist - case_dist) if case_delta == 0.0: continue - + # Shift our baseline strings down (to avoid rematching the # same part of the string) and add a scaled distance # amount to the penalties. @@ -155,7 +159,7 @@ def string_dist(str1, str2): base_dist = case_dist penalty += weight * case_delta dist = base_dist + penalty - + return dist def current_metadata(items): @@ -171,42 +175,33 @@ def current_metadata(items): consensus[key] = (freq == len(values)) return likelies['artist'], likelies['album'], consensus['artist'] -def order_items(items, trackinfo): - """Orders the items based on how they match some canonical track - information. Returns a list of Items whose length is equal to the - length of ``trackinfo``. This always produces a result if the - numbers of items is at most the number of TrackInfo objects - (otherwise, returns None). In the case of a partial match, the - returned list may contain None in some positions. +def assign_items(items, tracks): + """Given a list of Items and a list of TrackInfo objects, find the + best mapping between them. Returns a mapping from Items to TrackInfo + objects, a set of extra Items, and a set of extra TrackInfo + objects. These "extra" objects occur when there is an unequal number + of objects of the two types. """ - # Make sure lengths match: If there is less items, it might just be that - # there is some tracks missing. - if len(items) > len(trackinfo): - return None - # Construct the cost matrix. costs = [] - for cur_item in items: + for item in items: row = [] - for i, canon_item in enumerate(trackinfo): - row.append(track_distance(cur_item, canon_item, i+1)) + for i, track in enumerate(tracks): + row.append(track_distance(item, track)) costs.append(row) - + # Find a minimum-cost bipartite matching. matching = Munkres().compute(costs) - # Order items based on the matching. - ordered_items = [None]*len(trackinfo) - for cur_idx, canon_idx in matching: - ordered_items[canon_idx] = items[cur_idx] - return ordered_items + # Produce the output matching. + mapping = dict((items[i], tracks[j]) for (i, j) in matching) + extra_items = set(items) - set(mapping.keys()) + extra_tracks = set(tracks) - set(mapping.values()) + return mapping, extra_items, extra_tracks -def track_distance(item, track_info, track_index=None, incl_artist=False): - """Determines the significance of a track metadata change. Returns - a float in [0.0,1.0]. `track_index` is the track number of the - `track_info` metadata set. If `track_index` is provided and - item.track is set, then these indices are used as a component of - the distance calculation. `incl_artist` indicates that a distance +def track_distance(item, track_info, incl_artist=False): + """Determines the significance of a track metadata change. Returns a + float in [0.0,1.0]. `incl_artist` indicates that a distance component should be included for the track artist (i.e., for various-artist releases). """ @@ -221,7 +216,7 @@ def track_distance(item, track_info, track_index=None, incl_artist=False): diff = min(diff, TRACK_LENGTH_MAX) dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT dist_max += TRACK_LENGTH_WEIGHT - + # Track title. dist += string_dist(item.title, track_info.title) * TRACK_TITLE_WEIGHT dist_max += TRACK_TITLE_WEIGHT @@ -237,11 +232,11 @@ def track_distance(item, track_info, track_index=None, incl_artist=False): dist_max += TRACK_ARTIST_WEIGHT # Track index. - if track_index and item.track: - if item.track not in (track_index, track_info.medium_index): + if track_info.index and item.track: + if item.track not in (track_info.index, track_info.medium_index): dist += TRACK_INDEX_WEIGHT dist_max += TRACK_INDEX_WEIGHT - + # MusicBrainz track ID. if item.mb_trackid: if item.mb_trackid != track_info.track_id: @@ -255,35 +250,43 @@ def track_distance(item, track_info, track_index=None, incl_artist=False): return dist / dist_max -def distance(items, album_info): +def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. - Returns a float in [0.0,1.0]. The list of items must be ordered. + Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object + reflecting the album to be compared. `items` is a sequence of all + Item objects that will be matched (order is not important). + `mapping` is a dictionary mapping Items to TrackInfo objects; the + keys are a subset of `items` and the values are a subset of + `album_info.tracks`. """ cur_artist, cur_album, _ = current_metadata(items) cur_artist = cur_artist or '' cur_album = cur_album or '' - + # These accumulate the possible distance components. The final # distance will be dist/dist_max. dist = 0.0 dist_max = 0.0 - + # Artist/album metadata. if not album_info.va: dist += string_dist(cur_artist, album_info.artist) * ARTIST_WEIGHT dist_max += ARTIST_WEIGHT dist += string_dist(cur_album, album_info.album) * ALBUM_WEIGHT dist_max += ALBUM_WEIGHT - - # Track distances. - for i, (item, track_info) in enumerate(zip(items, album_info.tracks)): - if item: - dist += track_distance(item, track_info, i+1, album_info.va) * \ - TRACK_WEIGHT - dist_max += TRACK_WEIGHT - else: - dist += MISSING_WEIGHT - dist_max += MISSING_WEIGHT + + # Matched track distances. + for item, track in mapping.iteritems(): + dist += track_distance(item, track, album_info.va) * TRACK_WEIGHT + dist_max += TRACK_WEIGHT + + # Extra and unmatched tracks. + for track in set(album_info.tracks) - set(mapping.values()): + dist += MISSING_WEIGHT + dist_max += MISSING_WEIGHT + for item in set(items) - set(mapping.keys()): + dist += UNMATCHED_WEIGHT + dist_max += UNMATCHED_WEIGHT # Plugin distances. plugin_d, plugin_dm = plugins.album_distance(items, album_info) @@ -294,18 +297,19 @@ def distance(items, album_info): if dist_max == 0.0: return 0.0 else: - return dist/dist_max + return dist / dist_max def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an - info dict for the corresponding album. Otherwise, returns None. + AlbumInfo object for the corresponding album. Otherwise, returns + None. """ # Is there a consensus on the MB album ID? albumids = [item.mb_albumid for item in items if item.mb_albumid] if not albumids: log.debug('No album IDs found.') return None - + # If all album IDs are equal, look up the album. if bool(reduce(lambda x,y: x if x==y else (), albumids)): albumid = albumids[0] @@ -314,21 +318,21 @@ def match_by_id(items): else: log.debug('No album ID consensus.') return None - + #fixme In the future, at the expense of performance, we could use # other IDs (i.e., track and artist) in case the album tag isn't # present, but that event seems very unlikely. def recommendation(results): - """Given a sorted list of result tuples, returns a recommendation - flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based - on the results' distances. + """Given a sorted list of AlbumMatch or TrackMatch objects, return a + recommendation flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, + RECOMMEND_NONE) based on the results' distances. """ if not results: # No candidates: no recommendation. rec = RECOMMEND_NONE else: - min_dist = results[0][0] + min_dist = results[0].distance if min_dist < STRONG_REC_THRESH: # Strong recommendation level. rec = RECOMMEND_STRONG @@ -338,7 +342,7 @@ def recommendation(results): elif min_dist <= MEDIUM_REC_THRESH: # Medium recommendation level. rec = RECOMMEND_MEDIUM - elif results[1][0] - min_dist >= REC_GAP_THRESH: + elif results[1].distance - min_dist >= REC_GAP_THRESH: # Gap between first two candidates is large. rec = RECOMMEND_MEDIUM else: @@ -346,36 +350,28 @@ def recommendation(results): rec = RECOMMEND_NONE return rec -def validate_candidate(items, tuple_dict, info): +def _add_candidate(items, results, info): """Given a candidate AlbumInfo object, attempt to add the candidate - to the output dictionary of result tuples. This involves checking - the track count, ordering the items, checking for duplicates, and - calculating the distance. + to the output dictionary of AlbumMatch objects. This involves + checking the track count, ordering the items, checking for + duplicates, and calculating the distance. """ log.debug('Candidate: %s - %s' % (info.artist, info.album)) # Don't duplicate. - if info.album_id in tuple_dict: + if info.album_id in results: log.debug('Duplicate.') return - # Make sure the album has the correct number of tracks. - if len(items) > len(info.tracks): - log.debug('Too many items to match: %i > %i.' % - (len(items), len(info.tracks))) - return - - # Put items in order. - ordered = order_items(items, info.tracks) - if not ordered: - log.debug('Not orderable.') - return + # Find mapping between the items and the track info. + mapping, extra_items, extra_tracks = assign_items(items, info.tracks) # Get the change distance. - dist = distance(ordered, info) + dist = distance(items, info, mapping) log.debug('Success. Distance: %f' % dist) - tuple_dict[info.album_id] = dist, ordered, info + results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, + extra_items, extra_tracks) def tag_album(items, timid=False, search_artist=None, search_album=None, search_id=None): @@ -383,10 +379,8 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, set of items comprised by an album. Returns everything relevant: - The current artist. - The current album. - - A list of (distance, items, info) tuples where info is a - dictionary containing the inferred tags and items is a - reordered version of the input items list. The candidates are - sorted by distance (i.e., best match first). + - A list of AlbumMatch objects. The candidates are sorted by + distance (i.e., best match first). - A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM, or RECOMMEND_NONE; indicating that the first candidate is very likely, it is somewhat likely, or no conclusion could @@ -398,11 +392,11 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, # Get current metadata. cur_artist, cur_album, artist_consensus = current_metadata(items) log.debug('Tagging %s - %s' % (cur_artist, cur_album)) - + # The output result (distance, AlbumInfo) tuples (keyed by MB album # ID). candidates = {} - + # Try to find album indicated by MusicBrainz IDs. if search_id: log.debug('Searching for album ID: ' + search_id) @@ -410,7 +404,7 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, else: id_info = match_by_id(items) if id_info: - validate_candidate(items, candidates, id_info) + _add_candidate(items, candidates, id_info) rec = recommendation(candidates.values()) log.debug('Album ID match recommendation is ' + str(rec)) if candidates and not timid: @@ -427,13 +421,13 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, return cur_artist, cur_album, candidates.values(), rec else: return cur_artist, cur_album, [], RECOMMEND_NONE - + # Search terms. if not (search_artist and search_album): # No explicit search terms -- use current metadata. search_artist, search_album = cur_artist, cur_album log.debug(u'Search terms: %s - %s' % (search_artist, search_album)) - + # Is this album likely to be a "various artist" release? va_likely = ((not artist_consensus) or (search_artist.lower() in VA_ARTISTS) or @@ -445,8 +439,8 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, va_likely) log.debug(u'Evaluating %i candidates.' % len(search_cands)) for info in search_cands: - validate_candidate(items, candidates, info) - + _add_candidate(items, candidates, info) + # Sort and get the recommendation. candidates = sorted(candidates.itervalues()) rec = recommendation(candidates) @@ -455,10 +449,10 @@ def tag_album(items, timid=False, search_artist=None, search_album=None, def tag_item(item, timid=False, search_artist=None, search_title=None, search_id=None): """Attempts to find metadata for a single track. Returns a - `(candidates, recommendation)` pair where `candidates` is a list - of `(distance, track_info)` pairs. `search_artist` and - `search_title` may be used to override the current metadata for - the purposes of the MusicBrainz title; likewise `search_id`. + `(candidates, recommendation)` pair where `candidates` is a list of + TrackMatch objects. `search_artist` and `search_title` may be used + to override the current metadata for the purposes of the MusicBrainz + title; likewise `search_id`. """ # Holds candidates found so far: keys are MBIDs; values are # (distance, TrackInfo) pairs. @@ -471,7 +465,8 @@ def tag_item(item, timid=False, search_artist=None, search_title=None, track_info = hooks._track_for_id(trackid) if track_info: dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.track_id] = (dist, track_info) + candidates[track_info.track_id] = \ + hooks.TrackMatch(dist, track_info) # If this is a good match, then don't keep searching. rec = recommendation(candidates.values()) if rec == RECOMMEND_STRONG and not timid: @@ -484,7 +479,7 @@ def tag_item(item, timid=False, search_artist=None, search_title=None, return candidates.values(), rec else: return [], RECOMMEND_NONE - + # Search terms. if not (search_artist and search_title): search_artist, search_title = item.artist, item.title @@ -493,7 +488,7 @@ def tag_item(item, timid=False, search_artist=None, search_title=None, # Get and evaluate candidate metadata. for track_info in hooks._item_candidates(item, search_artist, search_title): dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.track_id] = (dist, track_info) + candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) # Sort by distance and return with recommendation. log.debug('Found %i candidates.' % len(candidates)) diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py index 6d286f57..b5ad589f 100644 --- a/lib/beets/autotag/mb.py +++ b/lib/beets/autotag/mb.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -16,9 +16,11 @@ """ import logging import lib.musicbrainzngs as musicbrainzngs +import traceback import lib.beets.autotag.hooks import lib.beets +from lib.beets import util SEARCH_LIMIT = 5 VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' @@ -26,8 +28,18 @@ VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' musicbrainzngs.set_useragent('beets', lib.beets.__version__, 'http://beets.radbox.org/') -class ServerBusyError(Exception): pass -class BadResponseError(Exception): pass +class MusicBrainzAPIError(util.HumanReadableException): + """An error while talking to MusicBrainz. The `query` field is the + parameter to the action and may have any type. + """ + def __init__(self, reason, verb, query, tb=None): + self.query = query + super(MusicBrainzAPIError, self).__init__(reason, verb, tb) + + def get_message(self): + return u'"{0}" in {1} with query {2}'.format( + self._reasonstr(), self.verb, repr(self.query) + ) log = logging.getLogger('beets') @@ -45,22 +57,64 @@ else: _mb_release_search = musicbrainzngs.search_releases _mb_recording_search = musicbrainzngs.search_recordings -def track_info(recording, medium=None, medium_index=None): +def _flatten_artist_credit(credit): + """Given a list representing an ``artist-credit`` block, flatten the + data into a triple of joined artist name strings: canonical, sort, and + credit. + """ + artist_parts = [] + artist_sort_parts = [] + artist_credit_parts = [] + for el in credit: + if isinstance(el, basestring): + # Join phrase. + artist_parts.append(el) + artist_credit_parts.append(el) + artist_sort_parts.append(el) + + else: + # An artist. + cur_artist_name = el['artist']['name'] + artist_parts.append(cur_artist_name) + + # Artist sort name. + if 'sort-name' in el['artist']: + artist_sort_parts.append(el['artist']['sort-name']) + else: + artist_sort_parts.append(cur_artist_name) + + # Artist credit. + if 'name' in el: + artist_credit_parts.append(el['name']) + else: + artist_credit_parts.append(cur_artist_name) + + return ( + ''.join(artist_parts), + ''.join(artist_sort_parts), + ''.join(artist_credit_parts), + ) + +def track_info(recording, index=None, medium=None, medium_index=None): """Translates a MusicBrainz recording result dictionary into a beets - ``TrackInfo`` object. ``medium_index``, if provided, is the track's - index (1-based) on its medium. + ``TrackInfo`` object. Three parameters are optional and are used + only for tracks that appear on releases (non-singletons): ``index``, + the overall track number; ``medium``, the disc number; + ``medium_index``, the track's index on its medium. Each number is a + 1-based index. """ info = lib.beets.autotag.hooks.TrackInfo(recording['title'], recording['id'], + index=index, medium=medium, medium_index=medium_index) - # Get the name of the track artist. - if recording.get('artist-credit-phrase'): - info.artist = recording['artist-credit-phrase'] + if recording.get('artist-credit'): + # Get the artist names. + info.artist, info.artist_sort, info.artist_credit = \ + _flatten_artist_credit(recording['artist-credit']) - # Get the ID of the first artist. - if 'artist-credit' in recording: + # Get the ID and sort name of the first artist. artist = recording['artist-credit'][0]['artist'] info.artist_id = artist['id'] @@ -84,25 +138,25 @@ def album_info(release): AlbumInfo object containing the interesting data about that release. """ # Get artist name using join phrases. - artist_parts = [] - for el in release['artist-credit']: - if isinstance(el, basestring): - artist_parts.append(el) - else: - artist_parts.append(el['artist']['name']) - artist_name = ''.join(artist_parts) + artist_name, artist_sort_name, artist_credit_name = \ + _flatten_artist_credit(release['artist-credit']) # Basic info. track_infos = [] + index = 0 for medium in release['medium-list']: + disctitle = medium.get('title') for track in medium['track-list']: + index += 1 ti = track_info(track['recording'], + index, int(medium['position']), int(track['position'])) if track.get('title'): # Track title may be distinct from underling recording # title. ti.title = track['title'] + ti.disctitle = disctitle track_infos.append(ti) info = lib.beets.autotag.hooks.AlbumInfo( release['title'], @@ -111,10 +165,15 @@ def album_info(release): release['artist-credit'][0]['artist']['id'], track_infos, mediums=len(release['medium-list']), + artist_sort=artist_sort_name, + artist_credit=artist_credit_name, ) info.va = info.artist_id == VARIOUS_ARTISTS_ID - if 'asin' in release: - info.asin = release['asin'] + info.asin = release.get('asin') + info.releasegroup_id = release['release-group']['id'] + info.albumdisambig = release['release-group'].get('disambiguation') + info.country = release.get('country') + info.albumstatus = release.get('status') # Release type not always populated. if 'type' in release['release-group']: @@ -137,12 +196,25 @@ def album_info(release): label = label_info['label']['name'] if label != '[no label]': info.label = label + info.catalognum = label_info.get('catalog-number') + + # Text representation data. + if release.get('text-representation'): + rep = release['text-representation'] + info.script = rep.get('script') + info.language = rep.get('language') + + # Media (format). + if release['medium-list']: + first_medium = release['medium-list'][0] + info.media = first_medium.get('format') return info def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): """Searches for a single album ("release" in MusicBrainz parlance) - and returns an iterator over AlbumInfo objects. + and returns an iterator over AlbumInfo objects. May raise a + MusicBrainzAPIError. The query consists of an artist name, an album name, and, optionally, a number of tracks on the album. @@ -161,7 +233,11 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): if not any(criteria.itervalues()): return - res = _mb_release_search(limit=limit, **criteria) + try: + res = _mb_release_search(limit=limit, **criteria) + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError(exc, 'release search', criteria, + traceback.format_exc()) for release in res['release-list']: # The search result is missing some data (namely, the tracks), # so we just use the ID and fetch the rest of the information. @@ -171,7 +247,7 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): def match_track(artist, title, limit=SEARCH_LIMIT): """Searches for a single track and returns an iterable of TrackInfo - objects. + objects. May raise a MusicBrainzAPIError. """ criteria = { 'artist': artist.lower(), @@ -181,28 +257,39 @@ def match_track(artist, title, limit=SEARCH_LIMIT): if not any(criteria.itervalues()): return - res = _mb_recording_search(limit=limit, **criteria) + try: + res = _mb_recording_search(limit=limit, **criteria) + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError(exc, 'recording search', criteria, + traceback.format_exc()) for recording in res['recording-list']: yield track_info(recording) def album_for_id(albumid): """Fetches an album by its MusicBrainz ID and returns an AlbumInfo - object or None if the album is not found. + object or None if the album is not found. May raise a + MusicBrainzAPIError. """ try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) except musicbrainzngs.ResponseError: log.debug('Album ID match failed.') return None + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError(exc, 'get release by ID', albumid, + traceback.format_exc()) return album_info(res['release']) def track_for_id(trackid): """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object - or None if no track is found. + or None if no track is found. May raise a MusicBrainzAPIError. """ try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) except musicbrainzngs.ResponseError: log.debug('Track ID match failed.') return None + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, + traceback.format_exc()) return track_info(res['recording']) diff --git a/lib/beets/importer.py b/lib/beets/importer.py index 1e7affd8..048077b8 100644 --- a/lib/beets/importer.py +++ b/lib/beets/importer.py @@ -8,14 +8,15 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Provides the basic, interface-agnostic workflow for importing and autotagging music files. """ -from __future__ import with_statement # Python 2.5 +from __future__ import print_function + import os import logging import pickle @@ -23,7 +24,6 @@ from collections import defaultdict from lib.beets import autotag from lib.beets import library -import lib.beets.autotag.art from lib.beets import plugins from lib.beets import util from lib.beets.util import pipeline @@ -56,7 +56,7 @@ def tag_log(logfile, status, path): reflect the reason the album couldn't be tagged. """ if logfile: - print >>logfile, '%s %s' % (status, path) + print('{0} {1}'.format(status, path), file=logfile) logfile.flush() def log_choice(config, task, duplicate=False): @@ -80,23 +80,6 @@ def log_choice(config, task, duplicate=False): elif task.choice_flag is action.SKIP: tag_log(config.logfile, 'skip', path) -def _reopen_lib(lib): - """Because of limitations in SQLite, a given Library is bound to - the thread in which it was created. This function reopens Library - objects so that they can be used from separate threads. - """ - if isinstance(lib, library.Library): - return library.Library( - lib.path, - lib.directory, - lib.path_formats, - lib.art_filename, - lib.timeout, - lib.replacements, - ) - else: - return lib - def _duplicate_check(lib, task): """Check whether an album already exists in the library. Returns a list of Album objects (empty if no duplicates are found). @@ -193,7 +176,7 @@ def _save_state(state): try: with open(STATE_FILE, 'w') as f: pickle.dump(state, f) - except IOError, exc: + except IOError as exc: log.error(u'state file could not be written: %s' % unicode(exc)) @@ -259,11 +242,11 @@ class ImportConfig(object): then never touched again. """ _fields = ['lib', 'paths', 'resume', 'logfile', 'color', 'quiet', - 'quiet_fallback', 'copy', 'write', 'art', 'delete', + 'quiet_fallback', 'copy', 'move', 'write', 'delete', 'choose_match_func', 'should_resume_func', 'threaded', 'autot', 'singletons', 'timid', 'choose_item_func', 'query', 'incremental', 'ignore', - 'resolve_duplicate_func'] + 'resolve_duplicate_func', 'per_disc_numbering'] def __init__(self, **kwargs): for slot in self._fields: setattr(self, slot, kwargs[slot]) @@ -283,6 +266,14 @@ class ImportConfig(object): self.resume = False self.incremental = False + # Copy and move are mutually exclusive. + if self.move: + self.copy = False + + # Only delete when copying. + if not self.copy: + self.delete = False + # The importer task class. @@ -296,6 +287,7 @@ class ImportTask(object): self.items = items self.sentinel = False self.remove_duplicates = False + self.is_album = True @classmethod def done_sentinel(cls, toppath): @@ -324,56 +316,50 @@ class ImportTask(object): obj.is_album = False return obj - def set_match(self, cur_artist, cur_album, candidates, rec): + def set_candidates(self, cur_artist, cur_album, candidates, rec): """Sets the candidates for this album matched by the `autotag.tag_album` method. """ + assert self.is_album assert not self.sentinel self.cur_artist = cur_artist self.cur_album = cur_album self.candidates = candidates self.rec = rec - self.is_album = True - def set_null_match(self): + def set_null_candidates(self): """Set the candidates to indicate no album match was found. """ - self.set_match(None, None, None, None) + self.cur_artist = None + self.cur_album = None + self.candidates = None + self.rec = None - def set_item_match(self, candidates, rec): + def set_item_candidates(self, candidates, rec): """Set the match for a single-item task.""" assert not self.is_album assert self.item is not None - self.item_match = (candidates, rec) - - def set_null_item_match(self): - """For single-item tasks, mark the item as having no matches. - """ - assert not self.is_album - assert self.item is not None - self.item_match = None + self.candidates = candidates + self.rec = rec def set_choice(self, choice): - """Given either an (info, items) tuple or an action constant, - indicates that an action has been selected by the user (or - automatically). + """Given an AlbumMatch or TrackMatch object or an action constant, + indicates that an action has been selected for this task. """ assert not self.sentinel # Not part of the task structure: assert choice not in (action.MANUAL, action.MANUAL_ID) - assert choice != action.APPLY # Only used internally. + assert choice != action.APPLY # Only used internally. if choice in (action.SKIP, action.ASIS, action.TRACKS): self.choice_flag = choice - self.info = None + self.match = None else: - assert not isinstance(choice, action) if self.is_album: - info, items = choice - self.items = items # Reordered items list. + assert isinstance(choice, autotag.AlbumMatch) else: - info = choice - self.info = info - self.choice_flag = action.APPLY # Implicit choice. + assert isinstance(choice, autotag.TrackMatch) + self.choice_flag = action.APPLY # Implicit choice. + self.match = choice def save_progress(self): """Updates the progress state to indicate that this album has @@ -393,7 +379,9 @@ class ImportTask(object): if self.sentinel or self.is_album: history_add(self.path) + # Logical decisions. + def should_write_tags(self): """Should new info be written to the files' metadata?""" if self.choice_flag == action.APPLY: @@ -402,16 +390,16 @@ class ImportTask(object): return False else: assert False - def should_fetch_art(self): - """Should album art be downloaded for this album?""" - return self.should_write_tags() and self.is_album + def should_skip(self): """After a choice has been made, returns True if this is a sentinel or it has been marked for skipping. """ return self.sentinel or self.choice_flag == action.SKIP - # Useful data. + + # Convenient data. + def chosen_ident(self): """Returns identifying metadata about the current choice. For albums, this is an (artist, album) pair. For items, this is @@ -424,12 +412,41 @@ class ImportTask(object): if self.choice_flag is action.ASIS: return (self.cur_artist, self.cur_album) elif self.choice_flag is action.APPLY: - return (self.info.artist, self.info.album) + return (self.match.info.artist, self.match.info.album) else: if self.choice_flag is action.ASIS: return (self.item.artist, self.item.title) elif self.choice_flag is action.APPLY: - return (self.info.artist, self.info.title) + return (self.match.info.artist, self.match.info.title) + + def imported_items(self): + """Return a list of Items that should be added to the library. + If this is an album task, return the list of items in the + selected match or everything if the choice is ASIS. If this is a + singleton task, return a list containing the item. + """ + if self.is_album: + if self.choice_flag == action.ASIS: + return list(self.items) + elif self.choice_flag == action.APPLY: + return self.match.mapping.keys() + else: + assert False + else: + return [self.item] + + + # Utilities. + + def prune(self, filename): + """Prune any empty directories above the given file. If this + task has no `toppath` or the file path provided is not within + the `toppath`, then this function has no effect. Similarly, if + the file still exists, no pruning is performed, so it's safe to + call when the file in question may not have been removed. + """ + if self.toppath and not os.path.exists(filename): + util.prune_dirs(os.path.dirname(filename), self.toppath) # Full-album pipeline stages. @@ -464,14 +481,14 @@ def read_tasks(config): if config.incremental: incremental_skipped = 0 history_dirs = history_get() - + for toppath in config.paths: # Check whether the path is to a file. if config.singletons and not os.path.isdir(syspath(toppath)): item = library.Item.from_path(toppath) yield ImportTask.item_task(item) continue - + # Produce paths under this directory. if progress: resume_dir = resume_dirs.get(toppath) @@ -513,16 +530,14 @@ def query_tasks(config): Instead of finding files from the filesystem, a query is used to match items from the library. """ - lib = _reopen_lib(config.lib) - if config.singletons: # Search for items. - for item in lib.items(config.query): + for item in config.lib.items(config.query): yield ImportTask.item_task(item) else: # Search for albums. - for album in lib.albums(config.query): + for album in config.lib.albums(config.query): log.debug('yielding album %i: %s - %s' % (album.id, album.albumartist, album.album)) items = list(album.items()) @@ -540,11 +555,13 @@ def initial_lookup(config): if task.sentinel: continue + plugins.send('import_task_start', task=task, config=config) + log.debug('Looking up: %s' % task.path) try: - task.set_match(*autotag.tag_album(task.items, config.timid)) + task.set_candidates(*autotag.tag_album(task.items, config.timid)) except autotag.AutotagError: - task.set_null_match() + task.set_null_candidates() def user_query(config): """A coroutine for interfacing with the user about the tagging @@ -552,18 +569,18 @@ def user_query(config): a file-like object for logging the import process. The coroutine accepts and yields ImportTask objects. """ - lib = _reopen_lib(config.lib) recent = set() task = None while True: task = yield task if task.sentinel: continue - + # Ask the user for a choice. choice = config.choose_match_func(task, config) task.set_choice(choice) log_choice(config, task) + plugins.send('import_task_choice', task=task, config=config) # As-tracks: transition to singleton workflow. if choice is action.TRACKS: @@ -577,7 +594,7 @@ def user_query(config): while True: item_task = yield item_tasks.append(item_task) - ipl = pipeline.Pipeline((emitter(), item_lookup(config), + ipl = pipeline.Pipeline((emitter(), item_lookup(config), item_query(config), collector())) ipl.run_sequential() task = pipeline.multiple(item_tasks) @@ -589,7 +606,7 @@ def user_query(config): # The "recent" set keeps track of identifiers for recently # imported albums -- those that haven't reached the database # yet. - if ident in recent or _duplicate_check(lib, task): + if ident in recent or _duplicate_check(config.lib, task): config.resolve_duplicate_func(task, config) log_choice(config, task, True) recent.add(ident) @@ -608,21 +625,20 @@ def show_progress(config): log.info(task.path) # Behave as if ASIS were selected. - task.set_null_match() + task.set_null_candidates() task.set_choice(action.ASIS) - + def apply_choices(config): - """A coroutine for applying changes to albums during the autotag - process. + """A coroutine for applying changes to albums and singletons during + the autotag process. """ - lib = _reopen_lib(config.lib) task = None - while True: + while True: task = yield task if task.should_skip(): continue - items = [i for i in task.items if i] if task.is_album else [task.item] + items = task.imported_items() # Clear IDs in case the items are being re-tagged. for item in items: item.id = None @@ -631,9 +647,13 @@ def apply_choices(config): # Change metadata. if task.should_write_tags(): if task.is_album: - autotag.apply_metadata(task.items, task.info) + autotag.apply_metadata( + task.match.info, task.match.mapping, + per_disc_numbering=config.per_disc_numbering + ) else: - autotag.apply_item_metadata(task.item, task.info) + autotag.apply_item_metadata(task.item, task.match.info) + plugins.send('import_task_apply', config=config, task=task) # Infer album-level fields. if task.is_album: @@ -642,14 +662,14 @@ def apply_choices(config): # Find existing item entries that these are replacing (for # re-imports). Old album structures are automatically cleaned up # when the last item is removed. - replaced_items = defaultdict(list) + task.replaced_items = defaultdict(list) for item in items: - dup_items = lib.items(library.MatchQuery('path', item.path)) + dup_items = config.lib.items(library.MatchQuery('path', item.path)) for dup_item in dup_items: - replaced_items[item].append(dup_item) + task.replaced_items[item].append(dup_item) log.debug('replacing item %i: %s' % (dup_item.id, displayable_path(item.path))) - log.debug('%i of %i items replaced' % (len(replaced_items), + log.debug('%i of %i items replaced' % (len(task.replaced_items), len(items))) # Find old items that should be replaced as part of a duplicate @@ -657,93 +677,111 @@ def apply_choices(config): duplicate_items = [] if task.remove_duplicates: if task.is_album: - for album in _duplicate_check(lib, task): + for album in _duplicate_check(config.lib, task): duplicate_items += album.items() else: - duplicate_items = _item_duplicate_check(lib, task) + duplicate_items = _item_duplicate_check(config.lib, task) log.debug('removing %i old duplicated items' % len(duplicate_items)) # Delete duplicate files that are located inside the library # directory. for duplicate_path in [i.path for i in duplicate_items]: - if lib.directory in util.ancestry(duplicate_path): + if config.lib.directory in util.ancestry(duplicate_path): log.debug(u'deleting replaced duplicate %s' % util.displayable_path(duplicate_path)) - util.soft_remove(duplicate_path) + util.remove(duplicate_path) util.prune_dirs(os.path.dirname(duplicate_path), - lib.directory) + config.lib.directory) - # Move/copy files. - task.old_paths = [item.path for item in items] - for item in items: - if config.copy: - # If we're replacing an item, then move rather than - # copying. - old_path = item.path - do_copy = not bool(replaced_items[item]) - lib.move(item, do_copy, task.is_album) - if not do_copy: - # If we moved the item, remove the now-nonexistent - # file from old_paths. - task.old_paths.remove(old_path) - if config.write and task.should_write_tags(): - item.write() - - # Add items to library. We consolidate this at the end to avoid - # locking while we do the copying and tag updates. - try: + # Add items -- before path changes -- to the library. We add the + # items now (rather than at the end) so that album structures + # are in place before calls to destination(). + with config.lib.transaction(): # Remove old items. - for replaced in replaced_items.itervalues(): + for replaced in task.replaced_items.itervalues(): for item in replaced: - lib.remove(item) + config.lib.remove(item) for item in duplicate_items: - lib.remove(item) + config.lib.remove(item) # Add new ones. if task.is_album: # Add an album. - album = lib.add_album(items) + album = config.lib.add_album(items) task.album_id = album.id else: # Add tracks. for item in items: - lib.add(item) - finally: - lib.save() + config.lib.add(item) -def fetch_art(config): - """A coroutine that fetches and applies album art for albums where - appropriate. +def plugin_stage(config, func): + """A coroutine (pipeline stage) that calls the given function with + each non-skipped import task. These stages occur between applying + metadata changes and moving/copying/writing files. + """ + task = None + while True: + task = yield task + if task.should_skip(): + continue + func(config, task) + +def manipulate_files(config): + """A coroutine (pipeline stage) that performs necessary file + manipulations *after* items have been added to the library. """ - lib = _reopen_lib(config.lib) task = None while True: task = yield task if task.should_skip(): continue - if task.should_fetch_art(): - artpath = lib.beets.autotag.art.art_for_album(task.info, task.path) + # Move/copy files. + items = task.imported_items() + task.old_paths = [item.path for item in items] # For deletion. + for item in items: + if config.move: + # Just move the file. + old_path = item.path + config.lib.move(item, False) + task.prune(old_path) + elif config.copy: + # If it's a reimport, move in-library files and copy + # out-of-library files. Otherwise, copy and keep track + # of the old path. + old_path = item.path + if task.replaced_items[item]: + # This is a reimport. Move in-library files and copy + # out-of-library files. + if config.lib.directory in util.ancestry(old_path): + config.lib.move(item, False) + # We moved the item, so remove the + # now-nonexistent file from old_paths. + task.old_paths.remove(old_path) + else: + config.lib.move(item, True) + else: + # A normal import. Just copy files and keep track of + # old paths. + config.lib.move(item, True) - # Save the art if any was found. - if artpath: - try: - album = lib.get_album(task.album_id) - album.set_art(artpath) - if config.delete and not util.samefile(artpath, - album.artpath): - # Delete the original file after it's imported. - os.remove(artpath) - finally: - lib.save(False) + if config.write and task.should_write_tags(): + item.write() + + # Save new paths. + with config.lib.transaction(): + for item in items: + config.lib.store(item) + + # Plugin event. + plugins.send('import_task_files', config=config, task=task) def finalize(config): """A coroutine that finishes up importer tasks. In particular, the coroutine sends plugin events, deletes old files, and saves progress. This is a "terminal" coroutine (it yields None). """ - lib = _reopen_lib(config.lib) while True: task = yield if task.should_skip(): @@ -753,15 +791,17 @@ def finalize(config): task.save_history() continue - items = [i for i in task.items if i] if task.is_album else [task.item] + items = task.imported_items() # Announce that we've added an album. if task.is_album: - album = lib.get_album(task.album_id) - plugins.send('album_imported', lib=lib, album=album, config=config) + album = config.lib.get_album(task.album_id) + plugins.send('album_imported', + lib=config.lib, album=album, config=config) else: for item in items: - plugins.send('item_imported', lib=lib, item=item, config=config) + plugins.send('item_imported', + lib=config.lib, item=item, config=config) # Finally, delete old files. if config.copy and config.delete: @@ -769,11 +809,8 @@ def finalize(config): for old_path in task.old_paths: # Only delete files that were actually copied. if old_path not in new_paths: - os.remove(syspath(old_path)) - # Clean up directory if it is emptied. - if task.toppath: - util.prune_dirs(os.path.dirname(old_path), - task.toppath) + util.remove(syspath(old_path), False) + task.prune(old_path) # Update progress. if config.resume is not False: @@ -794,13 +831,14 @@ def item_lookup(config): if task.sentinel: continue - task.set_item_match(*autotag.tag_item(task.item, config.timid)) + plugins.send('import_task_start', task=task, config=config) + + task.set_item_candidates(*autotag.tag_item(task.item, config.timid)) def item_query(config): """A coroutine that queries the user for input on single-item lookups. """ - lib = _reopen_lib(config.lib) task = None recent = set() while True: @@ -811,11 +849,12 @@ def item_query(config): choice = config.choose_item_func(task, config) task.set_choice(choice) log_choice(config, task) + plugins.send('import_task_choice', task=task, config=config) # Duplicate check. if task.choice_flag in (action.ASIS, action.APPLY): ident = task.chosen_ident() - if ident in recent or _item_duplicate_check(lib, task): + if ident in recent or _item_duplicate_check(config.lib, task): config.resolve_duplicate_func(task, config) log_choice(config, task, True) recent.add(ident) @@ -832,7 +871,7 @@ def item_progress(config): continue log.info(displayable_path(task.item.path)) - task.set_null_item_match() + task.set_null_candidates() task.set_choice(action.ASIS) @@ -843,7 +882,7 @@ def run_import(**kwargs): ImportConfig. """ config = ImportConfig(**kwargs) - + # Set up the pipeline. if config.query is None: stages = [read_tasks(config)] @@ -864,8 +903,9 @@ def run_import(**kwargs): # When not autotagging, just display progress. stages += [show_progress(config)] stages += [apply_choices(config)] - if config.art: - stages += [fetch_art(config)] + for stage_func in plugins.import_stages(): + stages.append(plugin_stage(config, stage_func)) + stages += [manipulate_files(config)] stages += [finalize(config)] pl = pipeline.Pipeline(stages) diff --git a/lib/beets/library.py b/lib/beets/library.py index 97e7c865..31be2460 100644 --- a/lib/beets/library.py +++ b/lib/beets/library.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,17 +8,23 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. +"""The core data store and collection logic for beets. +""" import sqlite3 import os import re import sys import logging import shlex -#from unidecode import unidecode +import unicodedata +import threading +import contextlib +from collections import defaultdict +# from unidecode import unidecode from lib.beets.mediafile import MediaFile from lib.beets import plugins from lib.beets import util @@ -40,30 +46,47 @@ ITEM_FIELDS = [ ('path', 'blob', False, False), ('album_id', 'int', False, False), - ('title', 'text', True, True), - ('artist', 'text', True, True), - ('album', 'text', True, True), - ('albumartist', 'text', True, True), - ('genre', 'text', True, True), - ('composer', 'text', True, True), - ('grouping', 'text', True, True), - ('year', 'int', True, True), - ('month', 'int', True, True), - ('day', 'int', True, True), - ('track', 'int', True, True), - ('tracktotal', 'int', True, True), - ('disc', 'int', True, True), - ('disctotal', 'int', True, True), - ('lyrics', 'text', True, True), - ('comments', 'text', True, True), - ('bpm', 'int', True, True), - ('comp', 'bool', True, True), - ('mb_trackid', 'text', True, True), - ('mb_albumid', 'text', True, True), - ('mb_artistid', 'text', True, True), - ('mb_albumartistid', 'text', True, True), - ('albumtype', 'text', True, True), - ('label', 'text', True, True), + ('title', 'text', True, True), + ('artist', 'text', True, True), + ('artist_sort', 'text', True, True), + ('artist_credit', 'text', True, True), + ('album', 'text', True, True), + ('albumartist', 'text', True, True), + ('albumartist_sort', 'text', True, True), + ('albumartist_credit', 'text', True, True), + ('genre', 'text', True, True), + ('composer', 'text', True, True), + ('grouping', 'text', True, True), + ('year', 'int', True, True), + ('month', 'int', True, True), + ('day', 'int', True, True), + ('track', 'int', True, True), + ('tracktotal', 'int', True, True), + ('disc', 'int', True, True), + ('disctotal', 'int', True, True), + ('lyrics', 'text', True, True), + ('comments', 'text', True, True), + ('bpm', 'int', True, True), + ('comp', 'bool', True, True), + ('mb_trackid', 'text', True, True), + ('mb_albumid', 'text', True, True), + ('mb_artistid', 'text', True, True), + ('mb_albumartistid', 'text', True, True), + ('albumtype', 'text', True, True), + ('label', 'text', True, True), + ('acoustid_fingerprint', 'text', True, True), + ('acoustid_id', 'text', True, True), + ('mb_releasegroupid', 'text', True, True), + ('asin', 'text', True, True), + ('catalognum', 'text', True, True), + ('script', 'text', True, True), + ('language', 'text', True, True), + ('country', 'text', True, True), + ('albumstatus', 'text', True, True), + ('media', 'text', True, True), + ('albumdisambig', 'text', True, True), + ('disctitle', 'text', True, True), + ('encoder', 'text', True, True), ('length', 'real', False, True), ('bitrate', 'int', False, True), @@ -84,19 +107,30 @@ ALBUM_FIELDS = [ ('id', 'integer primary key', False), ('artpath', 'blob', False), - ('albumartist', 'text', True), - ('album', 'text', True), - ('genre', 'text', True), - ('year', 'int', True), - ('month', 'int', True), - ('day', 'int', True), - ('tracktotal', 'int', True), - ('disctotal', 'int', True), - ('comp', 'bool', True), - ('mb_albumid', 'text', True), - ('mb_albumartistid', 'text', True), - ('albumtype', 'text', True), - ('label', 'text', True), + ('albumartist', 'text', True), + ('albumartist_sort', 'text', True), + ('albumartist_credit', 'text', True, True), + ('album', 'text', True), + ('genre', 'text', True), + ('year', 'int', True), + ('month', 'int', True), + ('day', 'int', True), + ('tracktotal', 'int', True), + ('disctotal', 'int', True), + ('comp', 'bool', True), + ('mb_albumid', 'text', True), + ('mb_albumartistid', 'text', True), + ('albumtype', 'text', True), + ('label', 'text', True), + ('mb_releasegroupid', 'text', True), + ('asin', 'text', True), + ('catalognum', 'text', True), + ('script', 'text', True), + ('language', 'text', True), + ('country', 'text', True), + ('albumstatus', 'text', True), + ('media', 'text', True), + ('albumdisambig', 'text', True), ] ALBUM_KEYS = [f[0] for f in ALBUM_FIELDS] ALBUM_KEYS_ITEM = [f[0] for f in ALBUM_FIELDS if f[2]] @@ -110,11 +144,37 @@ ITEM_DEFAULT_FIELDS = ARTIST_DEFAULT_FIELDS + ALBUM_DEFAULT_FIELDS + \ # Special path format key. PF_KEY_DEFAULT = 'default' + # Logger. log = logging.getLogger('beets') if not log.handlers: log.addHandler(logging.StreamHandler()) +# A little SQL utility. +def _orelse(exp1, exp2): + """Generates an SQLite expression that evaluates to exp1 if exp1 is + non-null and non-empty or exp2 otherwise. + """ + return ('(CASE {0} WHEN NULL THEN {1} ' + 'WHEN "" THEN {1} ' + 'ELSE {0} END)').format(exp1, exp2) + +# An SQLite function for regular expression matching. +def _regexp(expr, val): + """Return a boolean indicating whether the regular expression `expr` + matches `val`. + """ + if val is None or expr is None: + return False + if not isinstance(val, basestring): + val = unicode(val) + try: + res = re.search(expr, val) + except re.error: + # Invalid regular expression. + return False + return res is not None + # Exceptions. @@ -129,7 +189,7 @@ class Item(object): self.dirty = {} self._fill_record(values) self._clear_dirty() - + @classmethod def from_path(cls, path): """Creates a new item from the media file at the specified path. @@ -139,7 +199,7 @@ class Item(object): 'album_id': None, }) i.read(path) - i.mtime = i.current_mtime() # Initial mtime. + i.mtime = i.current_mtime() # Initial mtime. return i def _fill_record(self, values): @@ -175,7 +235,7 @@ class Item(object): sets the record entry for that key to value. Note that to change the attribute in the database or in the file's tags, one must call store() or write(). - + Otherwise, performs an ordinary setattr. """ # Encode unicode paths and read buffers. @@ -187,17 +247,17 @@ class Item(object): if key in ITEM_KEYS: # If the value changed, mark the field as dirty. - if (not (key in self.record)) or (self.record[key] != value): + if (key not in self.record) or (self.record[key] != value): self.record[key] = value self.dirty[key] = True if key in ITEM_KEYS_WRITABLE: self.mtime = 0 # Reset mtime on dirty. else: super(Item, self).__setattr__(key, value) - - + + # Interaction with file metadata. - + def read(self, read_path=None): """Read the metadata from the associated file. If read_path is specified, read metadata from that file instead. @@ -215,7 +275,7 @@ class Item(object): # Database's mtime should now reflect the on-disk value. if read_path == self.path: self.mtime = self.current_mtime() - + def write(self): """Writes the item's metadata to the associated file. """ @@ -242,7 +302,7 @@ class Item(object): util.copy(self.path, dest) else: util.move(self.path, dest) - + # Either copying or moving succeeded, so update the stored path. self.path = dest @@ -253,6 +313,57 @@ class Item(object): return int(os.path.getmtime(syspath(self.path))) + # Templating. + + def evaluate_template(self, template, lib=None, sanitize=False, + pathmod=None): + """Evaluates a Template object using the item's fields. If `lib` + is provided, it is used to map some fields to the item's album + (if available) and is made available to template functions. If + `sanitize`, then each value will be sanitized for inclusion in a + file path. + """ + pathmod = pathmod or os.path + + # Get the item's Album if it has one. + album = lib.get_album(self) + + # Build the mapping for substitution in the template, + # beginning with the values from the database. + mapping = {} + for key in ITEM_KEYS_META: + # Get the values from either the item or its album. + if key in ALBUM_KEYS_ITEM and album is not None: + # From album. + value = getattr(album, key) + else: + # From Item. + value = getattr(self, key) + if sanitize: + value = util.sanitize_for_path(value, pathmod, key) + mapping[key] = value + + # Use the album artist if the track artist is not set and + # vice-versa. + if not mapping['artist']: + mapping['artist'] = mapping['albumartist'] + if not mapping['albumartist']: + mapping['albumartist'] = mapping['artist'] + + # Get values from plugins. + for key, value in plugins.template_values(self).iteritems(): + if sanitize: + value = util.sanitize_for_path(value, pathmod, key) + mapping[key] = value + + # Get template functions. + funcs = DefaultTemplateFunctions(self, lib, pathmod).functions() + funcs.update(plugins.template_funcs()) + + # Perform substitution. + return template.substitute(mapping, funcs) + + # Library queries. class Query(object): @@ -279,16 +390,14 @@ class Query(object): clause, subvals = self.clause() return ('SELECT ' + columns + ' FROM items WHERE ' + clause, subvals) - def count(self, library): + def count(self, tx): """Returns `(num, length)` where `num` is the number of items in the library matching this query and `length` is their total length in seconds. """ clause, subvals = self.clause() statement = 'SELECT COUNT(id), SUM(length) FROM items WHERE ' + clause - c = library.conn.execute(statement, subvals) - result = c.fetchone() - c.close() + result = tx.query(statement, subvals)[0] return (result[0], result[1] or 0.0) class FieldQuery(Query): @@ -300,7 +409,7 @@ class FieldQuery(Query): raise InvalidFieldError(field + ' is not an item key') self.field = field self.pattern = pattern - + class MatchQuery(FieldQuery): """A query that looks for exact matches in an item field.""" def clause(self): @@ -325,6 +434,21 @@ class SubstringQuery(FieldQuery): value = getattr(item, self.field) or '' return self.pattern.lower() in value.lower() +class RegexpQuery(FieldQuery): + """A query that matches a regular expression in a specific item field.""" + def __init__(self, field, pattern): + super(RegexpQuery, self).__init__(field, pattern) + self.regexp = re.compile(pattern) + + def clause(self): + clause = self.field + " REGEXP ?" + subvals = [self.pattern] + return clause, subvals + + def match(self, item): + value = getattr(item, self.field) or '' + return self.regexp.search(value) is not None + class BooleanQuery(MatchQuery): """Matches a boolean field. Pattern should either be a boolean or a string reflecting a boolean. @@ -355,7 +479,7 @@ class CollectionQuery(Query): """ def __init__(self, subqueries=()): self.subqueries = subqueries - + # is there a better way to do this? def __len__(self): return len(self.subqueries) def __getitem__(self, key): return self.subqueries[key] @@ -374,24 +498,34 @@ class CollectionQuery(Query): subvals += subq_subvals clause = (' ' + joiner + ' ').join(clause_parts) return clause, subvals - - # regular expression for _parse_query_part, below - _pq_regex = re.compile(# non-grouping optional segment for the keyword - r'(?:' - r'(\S+?)' # the keyword - r'(? %s' % (field, oldval, newval)) +# fields: Shows a list of available fields for queries and format strings. +fields_cmd = ui.Subcommand('fields', + help='show fields available for queries and format strings') +def fields_func(lib, config, opts, args): + print("Available item fields:") + print(" " + "\n ".join([key for key in library.ITEM_KEYS])) + print("\nAvailable album fields:") + print(" " + "\n ".join([key for key in library.ALBUM_KEYS])) + +fields_cmd.func = fields_func +default_commands.append(fields_cmd) + + # import: Autotagger and importer. DEFAULT_IMPORT_COPY = True +DEFAULT_IMPORT_MOVE = False DEFAULT_IMPORT_WRITE = True DEFAULT_IMPORT_DELETE = False DEFAULT_IMPORT_AUTOT = True DEFAULT_IMPORT_TIMID = False -DEFAULT_IMPORT_ART = True DEFAULT_IMPORT_QUIET = False DEFAULT_IMPORT_QUIET_FALLBACK = 'skip' DEFAULT_IMPORT_RESUME = None # "ask" @@ -101,6 +114,7 @@ DEFAULT_COLOR = True DEFAULT_IGNORE = [ '.*', '*~', ] +DEFAULT_PER_DISC_NUMBERING = False VARIOUS_ARTISTS = u'Various Artists' @@ -122,10 +136,11 @@ def dist_string(dist, color): out = ui.colorize('red', out) return out -def show_change(cur_artist, cur_album, items, info, dist, color=True): - """Print out a representation of the changes that will be made if - tags are changed from (cur_artist, cur_album, items) to info with - distance dist. +def show_change(cur_artist, cur_album, match, color=True, + per_disc_numbering=False): + """Print out a representation of the changes that will be made if an + album's tags are changed according to `match`, which must be an AlbumMatch + object. """ def show_album(artist, album, partial=False): if artist: @@ -148,14 +163,25 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): out += u' ' + warning print_(out) - # Record if the match is partial or not. - partial_match = None in items + def format_index(track_info): + """Return a string representing the track index of the given + TrackInfo object. + """ + if per_disc_numbering: + if match.info.mediums > 1: + return u'{0}-{1}'.format(track_info.medium, + track_info.medium_index) + else: + return unicode(track_info.medium_index) + else: + return unicode(track_info.index) # Identify the album in question. - if cur_artist != info.artist or \ - (cur_album != info.album and info.album != VARIOUS_ARTISTS): - artist_l, artist_r = cur_artist or '', info.artist - album_l, album_r = cur_album or '', info.album + if cur_artist != match.info.artist or \ + (cur_album != match.info.album and + match.info.album != VARIOUS_ARTISTS): + artist_l, artist_r = cur_artist or '', match.info.artist + album_l, album_r = cur_album or '', match.info.album if artist_r == VARIOUS_ARTISTS: # Hide artists for VA releases. artist_l, artist_r = u'', u'' @@ -169,8 +195,8 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): print_("To:") show_album(artist_r, album_r) else: - message = u"Tagging: %s - %s" % (info.artist, info.album) - if partial_match: + message = u"Tagging: %s - %s" % (match.info.artist, match.info.album) + if match.extra_items or match.extra_tracks: warning = PARTIAL_MATCH_MESSAGE if color: warning = ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) @@ -178,18 +204,17 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): print_(message) # Distance/similarity. - print_('(Similarity: %s)' % dist_string(dist, color)) + print_('(Similarity: %s)' % dist_string(match.distance, color)) # Tracks. - missing_tracks = [] - for i, (item, track_info) in enumerate(zip(items, info.tracks)): - if not item: - missing_tracks.append((i, track_info)) - continue - + pairs = match.mapping.items() + pairs.sort(key=lambda (_, track_info): track_info.index) + for item, track_info in pairs: # Get displayable LHS and RHS values. cur_track = unicode(item.track) - new_track = unicode(i+1) + new_track = format_index(track_info) + tracks_differ = item.track not in (track_info.index, + track_info.medium_index) cur_title = item.title new_title = track_info.title if item.length and track_info.length: @@ -198,48 +223,55 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): if color: cur_length = ui.colorize('red', cur_length) new_length = ui.colorize('red', new_length) - + # Possibly colorize changes. if color: cur_title, new_title = ui.colordiff(cur_title, new_title) - if cur_track != new_track: - cur_track = ui.colorize('red', cur_track) - new_track = ui.colorize('red', new_track) + cur_track = ui.colorize('red', cur_track) + new_track = ui.colorize('red', new_track) # Show filename (non-colorized) when title is not set. if not item.title.strip(): cur_title = displayable_path(os.path.basename(item.path)) - + if cur_title != new_title: lhs, rhs = cur_title, new_title - if cur_track != new_track: + if tracks_differ: lhs += u' (%s)' % cur_track rhs += u' (%s)' % new_track print_(u" * %s -> %s" % (lhs, rhs)) else: line = u' * %s' % item.title display = False - if cur_track != new_track: + if tracks_differ: display = True line += u' (%s -> %s)' % (cur_track, new_track) if item.length and track_info.length and \ abs(item.length - track_info.length) > 2.0: display = True - line += u' (%s -> %s)' % (cur_length, new_length) + line += u' (%s vs. %s)' % (cur_length, new_length) if display: print_(line) - for i, track_info in missing_tracks: - line = u' * Missing track: %s (%d)' % (track_info.title, i+1) + + # Missing and unmatched tracks. + for track_info in match.extra_tracks: + line = u' * Missing track: {0} ({1})'.format(track_info.title, + format_index(track_info)) + if color: + line = ui.colorize('yellow', line) + print_(line) + for item in match.extra_items: + line = u' * Unmatched track: {0} ({1})'.format(item.title, item.track) if color: line = ui.colorize('yellow', line) print_(line) -def show_item_change(item, info, dist, color): +def show_item_change(item, match, color): """Print out the change that would occur by tagging `item` with the - metadata from `info`. + metadata from `match`, a TrackMatch object. """ - cur_artist, new_artist = item.artist, info.artist - cur_title, new_title = item.title, info.title + cur_artist, new_artist = item.artist, match.info.artist + cur_title, new_title = item.title, match.info.title if cur_artist != new_artist or cur_title != new_title: if color: @@ -254,7 +286,7 @@ def show_item_change(item, info, dist, color): else: print_("Tagging track: %s - %s" % (cur_artist, cur_title)) - print_('(Similarity: %s)' % dist_string(dist, color)) + print_('(Similarity: %s)' % dist_string(match.distance, color)) def should_resume(config, path): return ui.input_yn("Import of the directory:\n%s" @@ -273,17 +305,17 @@ def _quiet_fall_back(config): return config.quiet_fallback def choose_candidate(candidates, singleton, rec, color, timid, - cur_artist=None, cur_album=None, item=None): + cur_artist=None, cur_album=None, item=None, + itemcount=None, per_disc_numbering=False): """Given a sorted list of candidates, ask the user for a selection - of which candidate to use. Applies to both full albums and - singletons (tracks). For albums, the candidates are `(dist, items, - info)` triples and `cur_artist` and `cur_album` must be provided. - For singletons, the candidates are `(dist, info)` pairs and `item` - must be provided. + of which candidate to use. Applies to both full albums and + singletons (tracks). Candidates are either AlbumMatch or TrackMatch + objects depending on `singleton`. for albums, `cur_artist`, + `cur_album`, and `itemcount` must be provided. For singletons, + `item` must be provided. Returns the result of the choice, which may SKIP, ASIS, TRACKS, or - MANUAL or a candidate. For albums, a candidate is a `(info, items)` - pair; for items, it is just a TrackInfo object. + MANUAL or a candidate (an AlbumMatch/TrackMatch object). """ # Sanity check. if singleton: @@ -294,11 +326,15 @@ def choose_candidate(candidates, singleton, rec, color, timid, # Zero candidates. if not candidates: - print_("No match found.") if singleton: + print_("No matching recordings found.") opts = ('Use as-is', 'Skip', 'Enter search', 'enter Id', 'aBort') else: + print_("No matching release found for {0} tracks." + .format(itemcount)) + print_('For help, see: ' + 'https://github.com/sampsyo/beets/wiki/FAQ#wiki-nomatch') opts = ('Use as-is', 'as Tracks', 'Skip', 'Enter search', 'enter Id', 'aBort') sel = ui.input_options(opts, color=color) @@ -321,12 +357,9 @@ def choose_candidate(candidates, singleton, rec, color, timid, # Is the change good enough? bypass_candidates = False if rec != autotag.RECOMMEND_NONE: - if singleton: - dist, info = candidates[0] - else: - dist, items, info = candidates[0] + match = candidates[0] bypass_candidates = True - + while True: # Display and choose from candidates. if not bypass_candidates: @@ -335,22 +368,24 @@ def choose_candidate(candidates, singleton, rec, color, timid, print_('Finding tags for track "%s - %s".' % (item.artist, item.title)) print_('Candidates:') - for i, (dist, info) in enumerate(candidates): - print_('%i. %s - %s (%s)' % (i+1, info.artist, - info.title, dist_string(dist, color))) + for i, match in enumerate(candidates): + print_('%i. %s - %s (%s)' % + (i + 1, match.info.artist, match.info.title, + dist_string(match.distance, color))) else: print_('Finding tags for album "%s - %s".' % (cur_artist, cur_album)) print_('Candidates:') - for i, (dist, items, info) in enumerate(candidates): - line = '%i. %s - %s' % (i+1, info.artist, info.album) + for i, match in enumerate(candidates): + line = '%i. %s - %s' % (i + 1, match.info.artist, + match.info.album) # Label and year disambiguation, if available. label, year = None, None - if info.label: - label = info.label - if info.year: - year = unicode(info.year) + if match.info.label: + label = match.info.label + if match.info.year: + year = unicode(match.info.year) if label and year: line += u' [%s, %s]' % (label, year) elif label: @@ -358,17 +393,17 @@ def choose_candidate(candidates, singleton, rec, color, timid, elif year: line += u' [%s]' % year - line += ' (%s)' % dist_string(dist, color) + line += ' (%s)' % dist_string(match.distance, color) # Point out the partial matches. - if None in items: + if match.extra_items or match.extra_tracks: warning = PARTIAL_MATCH_MESSAGE if color: warning = ui.colorize('yellow', warning) line += u' %s' % warning print_(line) - + # Ask the user for a choice. if singleton: opts = ('Skip', 'Use as-is', 'Enter search', 'enter Id', @@ -391,26 +426,24 @@ def choose_candidate(candidates, singleton, rec, color, timid, raise importer.ImportAbort() elif sel == 'i': return importer.action.MANUAL_ID - else: # Numerical selection. + else: # Numerical selection. if singleton: - dist, info = candidates[sel-1] + match = candidates[sel - 1] else: - dist, items, info = candidates[sel-1] + match = candidates[sel - 1] bypass_candidates = False - + # Show what we're about to do. if singleton: - show_item_change(item, info, dist, color) + show_item_change(item, match, color) else: - show_change(cur_artist, cur_album, items, info, dist, color) - + show_change(cur_artist, cur_album, match, color, + per_disc_numbering) + # Exact match => tag automatically if we're not in timid mode. if rec == autotag.RECOMMEND_STRONG and not timid: - if singleton: - return info - else: - return info, items - + return match + # Ask for confirmation. if singleton: opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', @@ -420,10 +453,7 @@ def choose_candidate(candidates, singleton, rec, color, timid, 'as Tracks', 'Enter search', 'enter Id', 'aBort') sel = ui.input_options(opts, color=color) if sel == 'a': - if singleton: - return info - else: - return info, items + return match elif sel == 'm': pass elif sel == 's': @@ -444,18 +474,17 @@ def manual_search(singleton): """Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. """ - artist = raw_input('Artist: ').decode(sys.stdin.encoding) - name = raw_input('Track: ' if singleton else 'Album: ') \ - .decode(sys.stdin.encoding) + artist = input_('Artist:') + name = input_('Track:' if singleton else 'Album:') return artist.strip(), name.strip() def manual_id(singleton): """Input a MusicBrainz ID, either for an album ("release") or a track ("recording"). If no valid ID is entered, returns None. """ - prompt = 'Enter MusicBrainz %s ID: ' % \ + prompt = 'Enter MusicBrainz %s ID:' % \ ('recording' if singleton else 'release') - entry = raw_input(prompt).decode(sys.stdin.encoding).strip() + entry = input_(prompt).strip() # Find the first thing that looks like a UUID/MBID. match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', entry) @@ -468,7 +497,7 @@ def manual_id(singleton): def choose_match(task, config): """Given an initial autotagging of items, go through an interactive dance with the user to ask for a choice of metadata. Returns an - (info, items) pair, ASIS, or SKIP. + AlbumMatch object, ASIS, or SKIP. """ # Show what we're tagging. print_() @@ -477,10 +506,9 @@ def choose_match(task, config): if config.quiet: # No input; just make a decision. if task.rec == autotag.RECOMMEND_STRONG: - dist, items, info = task.candidates[0] - show_change(task.cur_artist, task.cur_album, items, info, dist, - config.color) - return info, items + match = task.candidates[0] + show_change(task.cur_artist, task.cur_album, match, config.color) + return match else: return _quiet_fall_back(config) @@ -488,10 +516,11 @@ def choose_match(task, config): candidates, rec = task.candidates, task.rec while True: # Ask for a choice from the user. - choice = choose_candidate(candidates, False, rec, config.color, + choice = choose_candidate(candidates, False, rec, config.color, config.timid, task.cur_artist, - task.cur_album) - + task.cur_album, itemcount=len(task.items), + per_disc_numbering=config.per_disc_numbering) + # Choose which tags to use. if choice in (importer.action.SKIP, importer.action.ASIS, importer.action.TRACKS): @@ -517,25 +546,25 @@ def choose_match(task, config): except autotag.AutotagError: candidates, rec = None, None else: - # We have a candidate! Finish tagging. Here, choice is - # an (info, items) pair as desired. - assert not isinstance(choice, importer.action) + # We have a candidate! Finish tagging. Here, choice is an + # AlbumMatch object. + assert isinstance(choice, autotag.AlbumMatch) return choice def choose_item(task, config): """Ask the user for a choice about tagging a single item. Returns - either an action constant or a TrackInfo object. + either an action constant or a TrackMatch object. """ print_() print_(task.item.path) - candidates, rec = task.item_match + candidates, rec = task.candidates, task.rec if config.quiet: # Quiet mode; make a decision. if rec == autotag.RECOMMEND_STRONG: - dist, track_info = candidates[0] - show_item_change(task.item, track_info, dist, config.color) - return track_info + match = candidates[0] + show_item_change(task.item, match, config.color) + return match else: return _quiet_fall_back(config) @@ -558,10 +587,10 @@ def choose_item(task, config): search_id = manual_id(True) if search_id: candidates, rec = autotag.tag_item(task.item, config.timid, - search_id=search_id) + search_id=search_id) else: # Chose a candidate. - assert not isinstance(choice, importer.action) + assert isinstance(choice, autotag.TrackMatch) return choice def resolve_duplicate(task, config): @@ -595,30 +624,30 @@ def resolve_duplicate(task, config): # The import command. -def import_files(lib, paths, copy, write, autot, logpath, art, threaded, +def import_files(lib, paths, copy, move, write, autot, logpath, threaded, color, delete, quiet, resume, quiet_fallback, singletons, - timid, query, incremental, ignore): + timid, query, incremental, ignore, per_disc_numbering): """Import the files in the given list of paths, tagging each leaf - directory as an album. If copy, then the files are copied into - the library folder. If write, then new metadata is written to the - files themselves. If not autot, then just import the files - without attempting to tag. If logpath is provided, then untaggable - albums will be logged there. If art, then attempt to download - cover art for each album. If threaded, then accelerate autotagging + directory as an album. If copy, then the files are copied into the + library folder. If write, then new metadata is written to the files + themselves. If not autot, then just import the files without + attempting to tag. If logpath is provided, then untaggable albums + will be logged there. If threaded, then accelerate autotagging imports by running them in multiple threads. If color, then ANSI-colorize some terminal output. If delete, then old files are - deleted when they are copied. If quiet, then the user is - never prompted for input; instead, the tagger just skips anything - it is not confident about. resume indicates whether interrupted - imports can be resumed and is either a boolean or None. - quiet_fallback should be either ASIS or SKIP and indicates what - should happen in quiet mode when the recommendation is not strong. + deleted when they are copied. If quiet, then the user is never + prompted for input; instead, the tagger just skips anything it is + not confident about. resume indicates whether interrupted imports + can be resumed and is either a boolean or None. quiet_fallback + should be either ASIS or SKIP and indicates what should happen in + quiet mode when the recommendation is not strong. """ # Check the user-specified directories. for path in paths: - if not singletons and not os.path.isdir(syspath(path)): + fullpath = syspath(normpath(path)) + if not singletons and not os.path.isdir(fullpath): raise ui.UserError('not a directory: ' + path) - elif singletons and not os.path.exists(syspath(path)): + elif singletons and not os.path.exists(fullpath): raise ui.UserError('no such file: ' + path) # Check parameter consistency. @@ -633,7 +662,7 @@ def import_files(lib, paths, copy, write, autot, logpath, art, threaded, except IOError: raise ui.UserError(u"could not open log file for writing: %s" % displayable_path(logpath)) - print >>logfile, 'import started', time.asctime() + print('import started', time.asctime(), file=logfile) else: logfile = None @@ -652,8 +681,8 @@ def import_files(lib, paths, copy, write, autot, logpath, art, threaded, quiet = quiet, quiet_fallback = quiet_fallback, copy = copy, + move = move, write = write, - art = art, delete = delete, threaded = threaded, autot = autot, @@ -666,12 +695,13 @@ def import_files(lib, paths, copy, write, autot, logpath, art, threaded, incremental = incremental, ignore = ignore, resolve_duplicate_func = resolve_duplicate, + per_disc_numbering = per_disc_numbering, ) - + finally: # If we were logging, close the file. if logfile: - print >>logfile, '' + print('', file=logfile) logfile.close() # Emit event. @@ -696,10 +726,6 @@ import_cmd.parser.add_option('-p', '--resume', action='store_true', default=None, help="resume importing if interrupted") import_cmd.parser.add_option('-P', '--noresume', action='store_false', dest='resume', help="do not try to resume importing") -import_cmd.parser.add_option('-r', '--art', action='store_true', - default=None, help="try to download album art") -import_cmd.parser.add_option('-R', '--noart', action='store_false', - dest='art', help="don't album art (opposite of -r)") import_cmd.parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help="never prompt for input: skip albums instead") import_cmd.parser.add_option('-l', '--log', dest='logpath', @@ -712,19 +738,20 @@ import_cmd.parser.add_option('-L', '--library', dest='library', action='store_true', help='retag items matching a query') import_cmd.parser.add_option('-i', '--incremental', dest='incremental', action='store_true', help='skip already-imported directories') +import_cmd.parser.add_option('-I', '--noincremental', dest='incremental', + action='store_false', help='do not skip already-imported directories') def import_func(lib, config, opts, args): copy = opts.copy if opts.copy is not None else \ ui.config_val(config, 'beets', 'import_copy', DEFAULT_IMPORT_COPY, bool) + move = ui.config_val(config, 'beets', 'import_move', + DEFAULT_IMPORT_MOVE, bool) write = opts.write if opts.write is not None else \ ui.config_val(config, 'beets', 'import_write', DEFAULT_IMPORT_WRITE, bool) delete = ui.config_val(config, 'beets', 'import_delete', DEFAULT_IMPORT_DELETE, bool) autot = opts.autotag if opts.autotag is not None else DEFAULT_IMPORT_AUTOT - art = opts.art if opts.art is not None else \ - ui.config_val(config, 'beets', 'import_art', - DEFAULT_IMPORT_ART, bool) threaded = ui.config_val(config, 'beets', 'threaded', DEFAULT_THREADED, bool) color = ui.config_val(config, 'beets', 'color', DEFAULT_COLOR, bool) @@ -741,6 +768,8 @@ def import_func(lib, config, opts, args): ui.config_val(config, 'beets', 'import_incremental', DEFAULT_IMPORT_INCREMENTAL, bool) ignore = ui.config_val(config, 'beets', 'ignore', DEFAULT_IGNORE, list) + per_disc_numbering = ui.config_val(config, 'beets', 'per_disc_numbering', + DEFAULT_PER_DISC_NUMBERING, bool) # Resume has three options: yes, no, and "ask" (None). resume = opts.resume if opts.resume is not None else \ @@ -753,6 +782,11 @@ def import_func(lib, config, opts, args): else: resume = None + # Special case: --copy flag suppresses import_move (which would + # otherwise take precedence). + if opts.copy: + move = False + if quiet_fallback_str == 'asis': quiet_fallback = importer.action.ASIS else: @@ -765,26 +799,23 @@ def import_func(lib, config, opts, args): query = None paths = args - import_files(lib, paths, copy, write, autot, logpath, art, threaded, + import_files(lib, paths, copy, move, write, autot, logpath, threaded, color, delete, quiet, resume, quiet_fallback, singletons, - timid, query, incremental, ignore) + timid, query, incremental, ignore, per_disc_numbering) import_cmd.func = import_func default_commands.append(import_cmd) # list: Query and show library contents. +DEFAULT_LIST_FORMAT_ITEM = '$artist - $album - $title' +DEFAULT_LIST_FORMAT_ALBUM = '$albumartist - $album' + def list_items(lib, query, album, path, fmt): """Print out items in lib matching query. If album, then search for albums instead of single items. If path, print the matched objects' paths instead of human-readable information about them. """ - if fmt is None: - # If no specific template is supplied, use a default. - if album: - fmt = u'$albumartist - $album' - else: - fmt = u'$artist - $album - $title' template = Template(fmt) if album: @@ -792,13 +823,13 @@ def list_items(lib, query, album, path, fmt): if path: print_(album.item_dir()) elif fmt is not None: - print_(template.substitute(album._record)) + print_(album.evaluate_template(template)) else: for item in lib.items(query): if path: print_(item.path) elif fmt is not None: - print_(template.substitute(item.record)) + print_(item.evaluate_template(template, lib)) list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) list_cmd.parser.add_option('-a', '--album', action='store_true', @@ -808,7 +839,16 @@ list_cmd.parser.add_option('-p', '--path', action='store_true', list_cmd.parser.add_option('-f', '--format', action='store', help='print with custom format', default=None) def list_func(lib, config, opts, args): - list_items(lib, decargs(args), opts.album, opts.path, opts.format) + fmt = opts.format + if not fmt: + # If no format is specified, fall back to a default. + if opts.album: + fmt = ui.config_val(config, 'beets', 'list_format_album', + DEFAULT_LIST_FORMAT_ALBUM) + else: + fmt = ui.config_val(config, 'beets', 'list_format_item', + DEFAULT_LIST_FORMAT_ITEM) + list_items(lib, decargs(args), opts.album, opts.path, fmt) list_cmd.func = list_func default_commands.append(list_cmd) @@ -819,89 +859,89 @@ def update_items(lib, query, album, move, color, pretend): """For all the items matched by the query, update the library to reflect the item's embedded tags. """ - items, _ = _do_query(lib, query, album) + with lib.transaction(): + items, _ = _do_query(lib, query, album) - # Walk through the items and pick up their changes. - affected_albums = set() - for item in items: - # Item deleted? - if not os.path.exists(syspath(item.path)): - print_(u'X %s - %s' % (item.artist, item.title)) - if not pretend: - lib.remove(item, True) - affected_albums.add(item.album_id) - continue - - # Did the item change since last checked? - if item.current_mtime() <= item.mtime: - log.debug(u'skipping %s because mtime is up to date (%i)' % - (displayable_path(item.path), item.mtime)) - continue - - # Read new data. - old_data = dict(item.record) - item.read() - - # Special-case album artist when it matches track artist. (Hacky - # but necessary for preserving album-level metadata for non- - # autotagged imports.) - if not item.albumartist and \ - old_data['albumartist'] == old_data['artist'] == item.artist: - item.albumartist = old_data['albumartist'] - item.dirty['albumartist'] = False - - # Get and save metadata changes. - changes = {} - for key in library.ITEM_KEYS_META: - if item.dirty[key]: - changes[key] = old_data[key], getattr(item, key) - if changes: - # Something changed. - print_(u'* %s - %s' % (item.artist, item.title)) - for key, (oldval, newval) in changes.iteritems(): - _showdiff(key, oldval, newval, color) - - # If we're just pretending, then don't move or save. - if pretend: + # Walk through the items and pick up their changes. + affected_albums = set() + for item in items: + # Item deleted? + if not os.path.exists(syspath(item.path)): + print_(u'X %s - %s' % (item.artist, item.title)) + if not pretend: + lib.remove(item, True) + affected_albums.add(item.album_id) continue - # Move the item if it's in the library. - if move and lib.directory in ancestry(item.path): - lib.move(item) + # Did the item change since last checked? + if item.current_mtime() <= item.mtime: + log.debug(u'skipping %s because mtime is up to date (%i)' % + (displayable_path(item.path), item.mtime)) + continue - lib.store(item) - affected_albums.add(item.album_id) - elif not pretend: - # The file's mtime was different, but there were no changes - # to the metadata. Store the new mtime, which is set in the - # call to read(), so we don't check this again in the - # future. - lib.store(item) + # Read new data. + old_data = dict(item.record) + item.read() - # Skip album changes while pretending. - if pretend: - return + # Special-case album artist when it matches track artist. (Hacky + # but necessary for preserving album-level metadata for non- + # autotagged imports.) + if not item.albumartist and \ + old_data['albumartist'] == old_data['artist'] == \ + item.artist: + item.albumartist = old_data['albumartist'] + item.dirty['albumartist'] = False - # Modify affected albums to reflect changes in their items. - for album_id in affected_albums: - if album_id is None: # Singletons. - continue - album = lib.get_album(album_id) - if not album: # Empty albums have already been removed. - log.debug('emptied album %i' % album_id) - continue - al_items = list(album.items()) + # Get and save metadata changes. + changes = {} + for key in library.ITEM_KEYS_META: + if item.dirty[key]: + changes[key] = old_data[key], getattr(item, key) + if changes: + # Something changed. + print_(u'* %s - %s' % (item.artist, item.title)) + for key, (oldval, newval) in changes.iteritems(): + _showdiff(key, oldval, newval, color) - # Update album structure to reflect an item in it. - for key in library.ALBUM_KEYS_ITEM: - setattr(album, key, getattr(al_items[0], key)) + # If we're just pretending, then don't move or save. + if pretend: + continue - # Move album art (and any inconsistent items). - if move and lib.directory in ancestry(al_items[0].path): - log.debug('moving album %i' % album_id) - album.move() + # Move the item if it's in the library. + if move and lib.directory in ancestry(item.path): + lib.move(item) - lib.save() + lib.store(item) + affected_albums.add(item.album_id) + elif not pretend: + # The file's mtime was different, but there were no changes + # to the metadata. Store the new mtime, which is set in the + # call to read(), so we don't check this again in the + # future. + lib.store(item) + + # Skip album changes while pretending. + if pretend: + return + + # Modify affected albums to reflect changes in their items. + for album_id in affected_albums: + if album_id is None: # Singletons. + continue + album = lib.get_album(album_id) + if not album: # Empty albums have already been removed. + log.debug('emptied album %i' % album_id) + continue + al_items = list(album.items()) + + # Update album structure to reflect an item in it. + for key in library.ALBUM_KEYS_ITEM: + setattr(album, key, getattr(al_items[0], key)) + + # Move album art (and any inconsistent items). + if move and lib.directory in ancestry(al_items[0].path): + log.debug('moving album %i' % album_id) + album.move() update_cmd = ui.Subcommand('update', help='update the library', aliases=('upd','up',)) @@ -942,14 +982,13 @@ def remove_items(lib, query, album, delete=False): return # Remove (and possibly delete) items. - if album: - for al in albums: - al.remove(delete) - else: - for item in items: - lib.remove(item, delete) - - lib.save() + with lib.transaction(): + if album: + for al in albums: + al.remove(delete) + else: + for item in items: + lib.remove(item, delete) remove_cmd = ui.Subcommand('remove', help='remove matching items from the library', aliases=('rm',)) @@ -1007,16 +1046,18 @@ default_commands.append(stats_cmd) # version: Show current beets version. def show_version(lib, config, opts, args): - print 'beets version %s' % lib.beets.__version__ + + print_('beets version %s' % lib.beets.__version__) + # Show plugins. names = [] for plugin in plugins.find_plugins(): modname = plugin.__module__ names.append(modname.split('.')[-1]) if names: - print 'plugins:', ', '.join(names) + print_('plugins:', ', '.join(names)) else: - print 'no plugins loaded' + print_('no plugins loaded') version_cmd = ui.Subcommand('version', help='output version information') version_cmd.func = show_version @@ -1061,23 +1102,23 @@ def modify_items(lib, mods, query, write, move, album, color, confirm): return # Apply changes to database. - for obj in objs: - for field, value in fsets.iteritems(): - setattr(obj, field, value) + with lib.transaction(): + for obj in objs: + for field, value in fsets.iteritems(): + setattr(obj, field, value) - if move: - cur_path = obj.item_dir() if album else obj.path - if lib.directory in ancestry(cur_path): # In library? - log.debug('moving object %s' % cur_path) - if album: - obj.move() - else: - lib.move(obj) + if move: + cur_path = obj.item_dir() if album else obj.path + if lib.directory in ancestry(cur_path): # In library? + log.debug('moving object %s' % cur_path) + if album: + obj.move() + else: + lib.move(obj) - # When modifying items, we have to store them to the database. - if not album: - lib.store(obj) - lib.save() + # When modifying items, we have to store them to the database. + if not album: + lib.store(obj) # Apply tags if requested. if write: @@ -1136,7 +1177,6 @@ def move_items(lib, dest, query, copy, album): else: lib.move(obj, copy, basedir=dest) lib.store(obj) - lib.save() move_cmd = ui.Subcommand('move', help='move or copy items', aliases=('mv',)) diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py index b0ec38ba..380bfaf8 100644 --- a/lib/beets/util/__init__.py +++ b/lib/beets/util/__init__.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2011, Adrian Sampson. +# Copyright 2012, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -8,20 +8,102 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Miscellaneous utility functions.""" +from __future__ import division + import os import sys import re import shutil import fnmatch from collections import defaultdict +import traceback MAX_FILENAME_LENGTH = 200 +class HumanReadableException(Exception): + """An Exception that can include a human-readable error message to + be logged without a traceback. Can preserve a traceback for + debugging purposes as well. + + Has at least two fields: `reason`, the underlying exception or a + string describing the problem; and `verb`, the action being + performed during the error. + + If `tb` is provided, it is a string containing a traceback for the + associated exception. (Note that this is not necessary in Python 3.x + and should be removed when we make the transition.) + """ + error_kind = 'Error' # Human-readable description of error type. + + def __init__(self, reason, verb, tb=None): + self.reason = reason + self.verb = verb + self.tb = tb + super(HumanReadableException, self).__init__(self.get_message()) + + def _gerund(self): + """Generate a (likely) gerund form of the English verb. + """ + if ' ' in self.verb: + return self.verb + gerund = self.verb[:-1] if self.verb.endswith('e') else self.verb + gerund += 'ing' + return gerund + + def _reasonstr(self): + """Get the reason as a string.""" + if isinstance(self.reason, basestring): + return self.reason + elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError + return self.reason.strerror + else: + return u'"{0}"'.format(self.reason) + + def get_message(self): + """Create the human-readable description of the error, sans + introduction. + """ + raise NotImplementedError + + def log(self, logger): + """Log to the provided `logger` a human-readable message as an + error and a verbose traceback as a debug message. + """ + if self.tb: + logger.debug(self.tb) + logger.error(u'{0}: {1}'.format(self.error_kind, self.args[0])) + +class FilesystemError(HumanReadableException): + """An error that occurred while performing a filesystem manipulation + via a function in this module. The `paths` field is a sequence of + pathnames involved in the operation. + """ + def __init__(self, reason, verb, paths, tb=None): + self.paths = paths + super(FilesystemError, self).__init__(reason, verb, tb) + + def get_message(self): + # Use a nicer English phrasing for some specific verbs. + if self.verb in ('move', 'copy', 'rename'): + clause = 'while {0} {1} to {2}'.format( + self._gerund(), repr(self.paths[0]), repr(self.paths[1]) + ) + elif self.verb in ('delete',): + clause = 'while {0} {1}'.format( + self._gerund(), repr(self.paths[0]) + ) + else: + clause = 'during {0} of paths {1}'.format( + self.verb, u', '.join(repr(p) for p in self.paths) + ) + + return u'{0} {1}'.format(self._reasonstr(), clause) + def normpath(path): """Provide the canonical form of the path suitable for storing in the database. @@ -39,11 +121,11 @@ def ancestry(path, pathmod=None): last_path = None while path: path = pathmod.dirname(path) - + if path == last_path: break last_path = path - + if path: # don't yield '' out.insert(0, path) return out @@ -59,7 +141,9 @@ def sorted_walk(path, ignore=()): # Get all the directories and files at this level. dirs = [] files = [] - for base in os.listdir(path): + for base in os.listdir(syspath(path)): + base = bytestring_path(base) + # Skip ignored filenames. skip = False for pat in ignore: @@ -84,7 +168,7 @@ def sorted_walk(path, ignore=()): # Recurse into directories. for base in dirs: cur = os.path.join(path, base) - # yield from _sorted_walk(cur) + # yield from sorted_walk(...) for res in sorted_walk(cur, ignore): yield res @@ -149,13 +233,13 @@ def components(path, pathmod=None): comp = pathmod.basename(anc) if comp: comps.append(comp) - else: # root + else: # root comps.append(anc) - + last = pathmod.basename(path) if last: comps.append(last) - + return comps def bytestring_path(path): @@ -168,6 +252,13 @@ def bytestring_path(path): # Try to encode with default encodings, but fall back to UTF8. encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + if encoding == 'mbcs': + # On Windows, a broken encoding known to Python as "MBCS" is + # used for the filesystem. However, we only use the Unicode API + # for Windows paths, so the encoding is actually immaterial so + # we can avoid dealing with this nastiness. We arbitrarily + # choose UTF-8. + encoding = 'utf8' try: return path.encode(encoding) except (UnicodeError, LookupError): @@ -202,12 +293,16 @@ def syspath(path, pathmod=None): return path if not isinstance(path, unicode): - # Try to decode with default encodings, but fall back to UTF8. - encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + # Beets currently represents Windows paths internally with UTF-8 + # arbitrarily. But earlier versions used MBCS because it is + # reported as the FS encoding by Windows. Try both. try: - path = path.decode(encoding, 'replace') + path = path.decode('utf8') except UnicodeError: - path = path.decode('utf8', 'replace') + # The encoding should always be MBCS, Windows' broken + # Unicode representation. + encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + path = path.decode(encoding, 'replace') # Add the magic prefix if it isn't already there if not path.startswith(u'\\\\?\\'): @@ -219,42 +314,63 @@ def samefile(p1, p2): """Safer equality for paths.""" return shutil._samefile(syspath(p1), syspath(p2)) -def soft_remove(path): - """Remove the file if it exists.""" +def remove(path, soft=True): + """Remove the file. If `soft`, then no error will be raised if the + file does not exist. + """ path = syspath(path) - if os.path.exists(path): + if soft and not os.path.exists(path): + return + try: os.remove(path) + except (OSError, IOError) as exc: + raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) -def _assert_not_exists(path, pathmod=None): - """Raises an OSError if the path exists.""" - pathmod = pathmod or os.path - if pathmod.exists(path): - raise OSError('file exists: %s' % path) - -def copy(path, dest, replace=False, pathmod=None): - """Copy a plain file. Permissions are not copied. If dest already - exists, raises an OSError unless replace is True. Has no effect if - path is the same as dest. Paths are translated to system paths - before the syscall. +def copy(path, dest, replace=False, pathmod=os.path): + """Copy a plain file. Permissions are not copied. If `dest` already + exists, raises a FilesystemError unless `replace` is True. Has no + effect if `path` is the same as `dest`. Paths are translated to + system paths before the syscall. """ if samefile(path, dest): return path = syspath(path) dest = syspath(dest) - _assert_not_exists(dest, pathmod) - return shutil.copyfile(path, dest) + if not replace and pathmod.exists(dest): + raise FilesystemError('file exists', 'copy', (path, dest)) + try: + shutil.copyfile(path, dest) + except (OSError, IOError) as exc: + raise FilesystemError(exc, 'copy', (path, dest), + traceback.format_exc()) -def move(path, dest, replace=False, pathmod=None): - """Rename a file. dest may not be a directory. If dest already - exists, raises an OSError unless replace is True. Hos no effect if - path is the same as dest. Paths are translated to system paths. +def move(path, dest, replace=False, pathmod=os.path): + """Rename a file. `dest` may not be a directory. If `dest` already + exists, raises an OSError unless `replace` is True. Has no effect if + `path` is the same as `dest`. If the paths are on different + filesystems (or the rename otherwise fails), a copy is attempted + instead, in which case metadata will *not* be preserved. Paths are + translated to system paths. """ if samefile(path, dest): return path = syspath(path) dest = syspath(dest) - _assert_not_exists(dest, pathmod) - return shutil.move(path, dest) + if pathmod.exists(dest): + raise FilesystemError('file exists', 'rename', (path, dest), + traceback.format_exc()) + + # First, try renaming the file. + try: + os.rename(path, dest) + except OSError: + # Otherwise, copy and delete the original. + try: + shutil.copyfile(path, dest) + os.remove(path) + except (OSError, IOError) as exc: + raise FilesystemError(exc, 'move', (path, dest), + traceback.format_exc()) def unique_path(path): """Returns a version of ``path`` that does not exist on the @@ -277,33 +393,33 @@ def unique_path(path): if not os.path.exists(new_path): return new_path -# Note: POSIX actually supports \ and : -- I just think they're -# a pain. And ? has caused problems for some. +# Note: The Windows "reserved characters" are, of course, allowed on +# Unix. They are forbidden here because they cause problems on Samba +# shares, which are sufficiently common as to cause frequent problems. +# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx CHAR_REPLACE = [ - (re.compile(r'[\\/\?"]|^\.'), '_'), - (re.compile(r':'), '-'), -] -CHAR_REPLACE_WINDOWS = [ - (re.compile(r'["\*<>\|]|^\.|\.$|\s+$'), '_'), + (re.compile(ur'[\\/]'), u'_'), # / and \ -- forbidden everywhere. + (re.compile(ur'^\.'), u'_'), # Leading dot (hidden files on Unix). + (re.compile(ur'[\x00-\x1f]'), u''), # Control characters. + (re.compile(ur'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". + (re.compile(ur'\.$'), u'_'), # Trailing dots. + (re.compile(ur'\s+$'), u''), # Trailing whitespace. ] def sanitize_path(path, pathmod=None, replacements=None): - """Takes a path and makes sure that it is legal. Returns a new path. - Only works with fragments; won't work reliably on Windows when a - path begins with a drive letter. Path separators (including altsep!) - should already be cleaned from the path components. If replacements - is specified, it is used *instead* of the default set of - replacements for the platform; it must be a list of (compiled regex, - replacement string) pairs. + """Takes a path (as a Unicode string) and makes sure that it is + legal. Returns a new path. Only works with fragments; won't work + reliably on Windows when a path begins with a drive letter. Path + separators (including altsep!) should already be cleaned from the + path components. If replacements is specified, it is used *instead* + of the default set of replacements for the platform; it must be a + list of (compiled regex, replacement string) pairs. """ pathmod = pathmod or os.path - windows = pathmod.__name__ == 'ntpath' # Choose the appropriate replacements. if not replacements: replacements = list(CHAR_REPLACE) - if windows: - replacements += CHAR_REPLACE_WINDOWS - + comps = components(path, pathmod) if not comps: return '' @@ -311,10 +427,10 @@ def sanitize_path(path, pathmod=None, replacements=None): # Replace special characters. for regex, repl in replacements: comp = regex.sub(repl, comp) - + # Truncate each component. comp = comp[:MAX_FILENAME_LENGTH] - + comps[i] = comp return pathmod.join(*comps) @@ -336,10 +452,10 @@ def sanitize_for_path(value, pathmod, key=None): value = u'%02i' % (value or 0) elif key == 'bitrate': # Bitrate gets formatted as kbps. - value = u'%ikbps' % ((value or 0) / 1000) + value = u'%ikbps' % ((value or 0) // 1000) elif key == 'samplerate': # Sample rate formatted as kHz. - value = u'%ikHz' % ((value or 0) / 1000) + value = u'%ikHz' % ((value or 0) // 1000) else: value = unicode(value) return value @@ -360,7 +476,7 @@ def levenshtein(s1, s2): return levenshtein(s2, s1) if not s1: return len(s2) - + previous_row = xrange(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] @@ -370,7 +486,7 @@ def levenshtein(s1, s2): substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row - + return previous_row[-1] def plurality(objs): diff --git a/lib/beets/util/bluelet.py b/lib/beets/util/bluelet.py new file mode 100644 index 00000000..aee63116 --- /dev/null +++ b/lib/beets/util/bluelet.py @@ -0,0 +1,562 @@ +"""Extremely simple pure-Python implementation of coroutine-style +asynchronous socket I/O. Inspired by, but inferior to, Eventlet. +Bluelet can also be thought of as a less-terrible replacement for +asyncore. + +Bluelet: easy concurrency without all the messy parallelism. +""" +import socket +import select +import sys +import types +import errno +import traceback +import time +import collections + + +# A little bit of "six" (Python 2/3 compatibility): cope with PEP 3109 syntax +# changes. + +PY3 = sys.version_info[0] == 3 +if PY3: + def _reraise(typ, exc, tb): + raise exc.with_traceback(tb) +else: + exec(""" +def _reraise(typ, exc, tb): + raise typ, exc, tb""") + + +# Basic events used for thread scheduling. + +class Event(object): + """Just a base class identifying Bluelet events. An event is an + object yielded from a Bluelet thread coroutine to suspend operation + and communicate with the scheduler. + """ + pass + +class WaitableEvent(Event): + """A waitable event is one encapsulating an action that can be + waited for using a select() call. That is, it's an event with an + associated file descriptor. + """ + def waitables(self): + """Return "waitable" objects to pass to select(). Should return + three iterables for input readiness, output readiness, and + exceptional conditions (i.e., the three lists passed to + select()). + """ + return (), (), () + + def fire(self): + """Called when an assoicated file descriptor becomes ready + (i.e., is returned from a select() call). + """ + pass + +class ValueEvent(Event): + """An event that does nothing but return a fixed value.""" + def __init__(self, value): + self.value = value + +class ExceptionEvent(Event): + """Raise an exception at the yield point. Used internally.""" + def __init__(self, exc_info): + self.exc_info = exc_info + +class SpawnEvent(Event): + """Add a new coroutine thread to the scheduler.""" + def __init__(self, coro): + self.spawned = coro + +class JoinEvent(Event): + """Suspend the thread until the specified child thread has + completed. + """ + def __init__(self, child): + self.child = child + +class DelegationEvent(Event): + """Suspend execution of the current thread, start a new thread and, + once the child thread finished, return control to the parent + thread. + """ + def __init__(self, coro): + self.spawned = coro + +class ReturnEvent(Event): + """Return a value the current thread's delegator at the point of + delegation. Ends the current (delegate) thread. + """ + def __init__(self, value): + self.value = value + +class SleepEvent(WaitableEvent): + """Suspend the thread for a given duration. + """ + def __init__(self, duration): + self.wakeup_time = time.time() + duration + + def time_left(self): + return max(self.wakeup_time - time.time(), 0.0) + +class ReadEvent(WaitableEvent): + """Reads from a file-like object.""" + def __init__(self, fd, bufsize): + self.fd = fd + self.bufsize = bufsize + + def waitables(self): + return (self.fd,), (), () + + def fire(self): + return self.fd.read(self.bufsize) + +class WriteEvent(WaitableEvent): + """Writes to a file-like object.""" + def __init__(self, fd, data): + self.fd = fd + self.data = data + + def waitable(self): + return (), (self.fd,), () + + def fire(self): + self.fd.write(self.data) + + +# Core logic for executing and scheduling threads. + +def _event_select(events): + """Perform a select() over all the Events provided, returning the + ones ready to be fired. Only WaitableEvents (including SleepEvents) + matter here; all other events are ignored (and thus postponed). + """ + # Gather waitables and wakeup times. + waitable_to_event = {} + rlist, wlist, xlist = [], [], [] + earliest_wakeup = None + for event in events: + if isinstance(event, SleepEvent): + if not earliest_wakeup: + earliest_wakeup = event.wakeup_time + else: + earliest_wakeup = min(earliest_wakeup, event.wakeup_time) + elif isinstance(event, WaitableEvent): + r, w, x = event.waitables() + rlist += r + wlist += w + xlist += x + for waitable in r: + waitable_to_event[('r', waitable)] = event + for waitable in w: + waitable_to_event[('w', waitable)] = event + for waitable in x: + waitable_to_event[('x', waitable)] = event + + # If we have a any sleeping threads, determine how long to sleep. + if earliest_wakeup: + timeout = max(earliest_wakeup - time.time(), 0.0) + else: + timeout = None + + # Perform select() if we have any waitables. + if rlist or wlist or xlist: + rready, wready, xready = select.select(rlist, wlist, xlist, timeout) + else: + rready, wready, xready = (), (), () + if timeout: + time.sleep(timeout) + + # Gather ready events corresponding to the ready waitables. + ready_events = set() + for ready in rready: + ready_events.add(waitable_to_event[('r', ready)]) + for ready in wready: + ready_events.add(waitable_to_event[('w', ready)]) + for ready in xready: + ready_events.add(waitable_to_event[('x', ready)]) + + # Gather any finished sleeps. + for event in events: + if isinstance(event, SleepEvent) and event.time_left() == 0.0: + ready_events.add(event) + + return ready_events + +class ThreadException(Exception): + def __init__(self, coro, exc_info): + self.coro = coro + self.exc_info = exc_info + def reraise(self): + _reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + +SUSPENDED = Event() # Special sentinel placeholder for suspended threads. + +def run(root_coro): + """Schedules a coroutine, running it to completion. This + encapsulates the Bluelet scheduler, which the root coroutine can + add to by spawning new coroutines. + """ + # The "threads" dictionary keeps track of all the currently- + # executing and suspended coroutines. It maps coroutines to their + # currently "blocking" event. The event value may be SUSPENDED if + # the coroutine is waiting on some other condition: namely, a + # delegated coroutine or a joined coroutine. In this case, the + # coroutine should *also* appear as a value in one of the below + # dictionaries `delegators` or `joiners`. + threads = {root_coro: ValueEvent(None)} + + # Maps child coroutines to delegating parents. + delegators = {} + + # Maps child coroutines to joining (exit-waiting) parents. + joiners = collections.defaultdict(list) + + def complete_thread(coro, return_value): + """Remove a coroutine from the scheduling pool, awaking + delegators and joiners as necessary and returning the specified + value to any delegating parent. + """ + del threads[coro] + + # Resume delegator. + if coro in delegators: + threads[delegators[coro]] = ValueEvent(return_value) + del delegators[coro] + + # Resume joiners. + if coro in joiners: + for parent in joiners[coro]: + threads[parent] = ValueEvent(None) + del joiners[coro] + + def advance_thread(coro, value, is_exc=False): + """After an event is fired, run a given coroutine associated with + it in the threads dict until it yields again. If the coroutine + exits, then the thread is removed from the pool. If the coroutine + raises an exception, it is reraised in a ThreadException. If + is_exc is True, then the value must be an exc_info tuple and the + exception is thrown into the coroutine. + """ + try: + if is_exc: + next_event = coro.throw(*value) + else: + next_event = coro.send(value) + except StopIteration: + # Thread is done. + complete_thread(coro, None) + except: + # Thread raised some other exception. + del threads[coro] + raise ThreadException(coro, sys.exc_info()) + else: + if isinstance(next_event, types.GeneratorType): + # Automatically invoke sub-coroutines. (Shorthand for + # explicit bluelet.call().) + next_event = DelegationEvent(next_event) + threads[coro] = next_event + + # Continue advancing threads until root thread exits. + exit_te = None + while threads: + try: + # Look for events that can be run immediately. Continue + # running immediate events until nothing is ready. + while True: + have_ready = False + for coro, event in list(threads.items()): + if isinstance(event, SpawnEvent): + threads[event.spawned] = ValueEvent(None) # Spawn. + advance_thread(coro, None) + have_ready = True + elif isinstance(event, ValueEvent): + advance_thread(coro, event.value) + have_ready = True + elif isinstance(event, ExceptionEvent): + advance_thread(coro, event.exc_info, True) + have_ready = True + elif isinstance(event, DelegationEvent): + threads[coro] = SUSPENDED # Suspend. + threads[event.spawned] = ValueEvent(None) # Spawn. + delegators[event.spawned] = coro + have_ready = True + elif isinstance(event, ReturnEvent): + # Thread is done. + complete_thread(coro, event.value) + have_ready = True + elif isinstance(event, JoinEvent): + threads[coro] = SUSPENDED # Suspend. + joiners[event.child].append(coro) + have_ready = True + + # Only start the select when nothing else is ready. + if not have_ready: + break + + # Wait and fire. + event2coro = dict((v,k) for k,v in threads.items()) + for event in _event_select(threads.values()): + # Run the IO operation, but catch socket errors. + try: + value = event.fire() + except socket.error as exc: + if isinstance(exc.args, tuple) and \ + exc.args[0] == errno.EPIPE: + # Broken pipe. Remote host disconnected. + pass + else: + traceback.print_exc() + # Abort the coroutine. + threads[event2coro[event]] = ReturnEvent(None) + else: + advance_thread(event2coro[event], value) + + except ThreadException as te: + # Exception raised from inside a thread. + event = ExceptionEvent(te.exc_info) + if te.coro in delegators: + # The thread is a delegate. Raise exception in its + # delegator. + threads[delegators[te.coro]] = event + del delegators[te.coro] + else: + # The thread is root-level. Raise in client code. + exit_te = te + break + + except: + # For instance, KeyboardInterrupt during select(). Raise + # into root thread and terminate others. + threads = {root_coro: ExceptionEvent(sys.exc_info())} + + # If any threads still remain, kill them. + for coro in threads: + coro.close() + + # If we're exiting with an exception, raise it in the client. + if exit_te: + exit_te.reraise() + + +# Sockets and their associated events. + +class Listener(object): + """A socket wrapper object for listening sockets. + """ + def __init__(self, host, port): + """Create a listening socket on the given hostname and port. + """ + self.host = host + self.port = port + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.sock.bind((host, port)) + self.sock.listen(5) + + def accept(self): + """An event that waits for a connection on the listening socket. + When a connection is made, the event returns a Connection + object. + """ + return AcceptEvent(self) + + def close(self): + """Immediately close the listening socket. (Not an event.) + """ + self.sock.close() + +class Connection(object): + """A socket wrapper object for connected sockets. + """ + def __init__(self, sock, addr): + self.sock = sock + self.addr = addr + self._buf = b'' + + def close(self): + """Close the connection.""" + self.sock.close() + + def recv(self, size): + """Read at most size bytes of data from the socket.""" + if self._buf: + # We already have data read previously. + out = self._buf[:size] + self._buf = self._buf[size:] + return ValueEvent(out) + else: + return ReceiveEvent(self, size) + + def send(self, data): + """Sends data on the socket, returning the number of bytes + successfully sent. + """ + return SendEvent(self, data) + + def sendall(self, data): + """Send all of data on the socket.""" + return SendEvent(self, data, True) + + def readline(self, terminator=b"\n", bufsize=1024): + """Reads a line (delimited by terminator) from the socket.""" + while True: + if terminator in self._buf: + line, self._buf = self._buf.split(terminator, 1) + line += terminator + yield ReturnEvent(line) + break + data = yield ReceiveEvent(self, bufsize) + if data: + self._buf += data + else: + line = self._buf + self._buf = b'' + yield ReturnEvent(line) + break + +class AcceptEvent(WaitableEvent): + """An event for Listener objects (listening sockets) that suspends + execution until the socket gets a connection. + """ + def __init__(self, listener): + self.listener = listener + + def waitables(self): + return (self.listener.sock,), (), () + + def fire(self): + sock, addr = self.listener.sock.accept() + return Connection(sock, addr) + +class ReceiveEvent(WaitableEvent): + """An event for Connection objects (connected sockets) for + asynchronously reading data. + """ + def __init__(self, conn, bufsize): + self.conn = conn + self.bufsize = bufsize + + def waitables(self): + return (self.conn.sock,), (), () + + def fire(self): + return self.conn.sock.recv(self.bufsize) + +class SendEvent(WaitableEvent): + """An event for Connection objects (connected sockets) for + asynchronously writing data. + """ + def __init__(self, conn, data, sendall=False): + self.conn = conn + self.data = data + self.sendall = sendall + + def waitables(self): + return (), (self.conn.sock,), () + + def fire(self): + if self.sendall: + return self.conn.sock.sendall(self.data) + else: + return self.conn.sock.send(self.data) + + +# Public interface for threads; each returns an event object that +# can immediately be "yield"ed. + +def null(): + """Event: yield to the scheduler without doing anything special. + """ + return ValueEvent(None) + +def spawn(coro): + """Event: add another coroutine to the scheduler. Both the parent + and child coroutines run concurrently. + """ + if not isinstance(coro, types.GeneratorType): + raise ValueError('%s is not a coroutine' % str(coro)) + return SpawnEvent(coro) + +def call(coro): + """Event: delegate to another coroutine. The current coroutine + is resumed once the sub-coroutine finishes. If the sub-coroutine + returns a value using end(), then this event returns that value. + """ + if not isinstance(coro, types.GeneratorType): + raise ValueError('%s is not a coroutine' % str(coro)) + return DelegationEvent(coro) + +def end(value = None): + """Event: ends the coroutine and returns a value to its + delegator. + """ + return ReturnEvent(value) + +def read(fd, bufsize = None): + """Event: read from a file descriptor asynchronously.""" + if bufsize is None: + # Read all. + def reader(): + buf = [] + while True: + data = yield read(fd, 1024) + if not data: + break + buf.append(data) + yield ReturnEvent(''.join(buf)) + return DelegationEvent(reader()) + + else: + return ReadEvent(fd, bufsize) + +def write(fd, data): + """Event: write to a file descriptor asynchronously.""" + return WriteEvent(fd, data) + +def connect(host, port): + """Event: connect to a network address and return a Connection + object for communicating on the socket. + """ + addr = (host, port) + sock = socket.create_connection(addr) + return ValueEvent(Connection(sock, addr)) + +def sleep(duration): + """Event: suspend the thread for ``duration`` seconds. + """ + return SleepEvent(duration) + +def join(coro): + """Suspend the thread until another, previously `spawn`ed thread + completes. + """ + return JoinEvent(coro) + + +# Convenience function for running socket servers. + +def server(host, port, func): + """A coroutine that runs a network server. Host and port specify the + listening address. func should be a coroutine that takes a single + parameter, a Connection object. The coroutine is invoked for every + incoming connection on the listening socket. + """ + def handler(conn): + try: + yield func(conn) + finally: + conn.close() + + listener = Listener(host, port) + try: + while True: + conn = yield listener.accept() + yield spawn(handler(conn)) + except KeyboardInterrupt: + pass + finally: + listener.close() diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py index 794a0624..f4968025 100644 --- a/lib/beets/util/enumeration.py +++ b/lib/beets/util/enumeration.py @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -35,7 +35,7 @@ how you would expect them to. 'west' >>> Direction.north < Direction.west True - + Enumerations are classes; their instances represent the possible values of the enumeration. Because Python classes must have names, you may provide a `name` parameter to `enum`; if you don't, a meaningless one @@ -45,31 +45,31 @@ import random class Enumeration(type): """A metaclass whose classes are enumerations. - + The `values` attribute of the class is used to populate the enumeration. Values may either be a list of enumerated names or a string containing a space-separated list of names. When the class is created, it is instantiated for each name value in `values`. Each such instance is the name of the enumerated item as the sole argument. - + The `Enumerated` class is a good choice for a superclass. """ - + def __init__(cls, name, bases, dic): super(Enumeration, cls).__init__(name, bases, dic) - + if 'values' not in dic: # Do nothing if no values are provided (i.e., with # Enumerated itself). return - + # May be called with a single string, in which case we split on # whitespace for convenience. values = dic['values'] if isinstance(values, basestring): values = values.split() - + # Create the Enumerated instances for each value. We have to use # super's __setattr__ here because we disallow setattr below. super(Enumeration, cls).__setattr__('_items_dict', {}) @@ -78,56 +78,56 @@ class Enumeration(type): item = cls(value, len(cls._items_list)) cls._items_dict[value] = item cls._items_list.append(item) - + def __getattr__(cls, key): try: return cls._items_dict[key] except KeyError: raise AttributeError("enumeration '" + cls.__name__ + "' has no item '" + key + "'") - + def __setattr__(cls, key, val): raise TypeError("enumerations do not support attribute assignment") - + def __getitem__(cls, key): if isinstance(key, int): return cls._items_list[key] else: return getattr(cls, key) - + def __len__(cls): return len(cls._items_list) - + def __iter__(cls): return iter(cls._items_list) - + def __nonzero__(cls): # Ensures that __len__ doesn't get called before __init__ by # pydoc. return True - + class Enumerated(object): """An item in an enumeration. - + Contains instance methods inherited by enumerated objects. The metaclass is preset to `Enumeration` for your convenience. - - Instance attributes: + + Instance attributes: name -- The name of the item. index -- The index of the item in its enumeration. - + >>> from enumeration import Enumerated >>> class Garment(Enumerated): ... values = 'hat glove belt poncho lederhosen suspenders' ... def wear(self): - ... print 'now wearing a ' + self.name + ... print('now wearing a ' + self.name) ... >>> Garment.poncho.wear() now wearing a poncho """ - + __metaclass__ = Enumeration - + def __init__(self, name, index): self.name = name self.index = index @@ -149,18 +149,18 @@ class Enumerated(object): def enum(*values, **kwargs): """Shorthand for creating a new Enumeration class. - + Call with enumeration values as a list, a space-delimited string, or just an argument list. To give the class a name, pass it as the `name` keyword argument. Otherwise, a name will be chosen for you. - + The following are all equivalent: - + enum('pinkie ring middle index thumb') enum('pinkie', 'ring', 'middle', 'index', 'thumb') enum(['pinkie', 'ring', 'middle', 'index', 'thumb']) """ - + if ('name' not in kwargs) or kwargs['name'] is None: # Create a probably-unique name. It doesn't really have to be # unique, but getting distinct names each time helps with @@ -168,11 +168,11 @@ def enum(*values, **kwargs): name = 'Enumeration' + hex(random.randint(0,0xfffffff))[2:].upper() else: name = kwargs['name'] - + if len(values) == 1: # If there's only one value, we have a couple of alternate calling # styles. if isinstance(values[0], basestring) or hasattr(values[0], '__iter__'): values = values[0] - + return type(name, (Enumerated,), {'values': values}) diff --git a/lib/beets/util/functemplate.py b/lib/beets/util/functemplate.py index 5d692179..94cdf6c1 100644 --- a/lib/beets/util/functemplate.py +++ b/lib/beets/util/functemplate.py @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -25,7 +25,12 @@ library: unknown symbols are left intact. This is sort of like a tiny, horrible degeneration of a real templating engine like Jinja2 or Mustache. """ +from __future__ import print_function + import re +import ast +import dis +import types SYMBOL_DELIM = u'$' FUNC_DELIM = u'%' @@ -34,6 +39,9 @@ GROUP_CLOSE = u'}' ARG_SEP = u',' ESCAPE_CHAR = u'$' +VARIABLE_PREFIX = '__var_' +FUNCTION_PREFIX = '__func_' + class Environment(object): """Contains the values and functions to be substituted into a template. @@ -42,6 +50,88 @@ class Environment(object): self.values = values self.functions = functions + +# Code generation helpers. + +def ex_lvalue(name): + """A variable load expression.""" + return ast.Name(name, ast.Store()) + +def ex_rvalue(name): + """A variable store expression.""" + return ast.Name(name, ast.Load()) + +def ex_literal(val): + """An int, float, long, bool, string, or None literal with the given + value. + """ + if val is None: + return ast.Name('None', ast.Load()) + elif isinstance(val, (int, float, long)): + return ast.Num(val) + elif isinstance(val, bool): + return ast.Name(str(val), ast.Load()) + elif isinstance(val, basestring): + return ast.Str(val) + raise TypeError('no literal for {0}'.format(type(val))) + +def ex_varassign(name, expr): + """Assign an expression into a single variable. The expression may + either be an `ast.expr` object or a value to be used as a literal. + """ + if not isinstance(expr, ast.expr): + expr = ex_literal(expr) + return ast.Assign([ex_lvalue(name)], expr) + +def ex_call(func, args): + """A function-call expression with only positional parameters. The + function may be an expression or the name of a function. Each + argument may be an expression or a value to be used as a literal. + """ + if isinstance(func, basestring): + func = ex_rvalue(func) + + args = list(args) + for i in range(len(args)): + if not isinstance(args[i], ast.expr): + args[i] = ex_literal(args[i]) + + return ast.Call(func, args, [], None, None) + +def compile_func(arg_names, statements, name='_the_func', debug=False): + """Compile a list of statements as the body of a function and return + the resulting Python function. If `debug`, then print out the + bytecode of the compiled function. + """ + func_def = ast.FunctionDef( + name, + ast.arguments( + [ast.Name(n, ast.Param()) for n in arg_names], + None, None, + [ex_literal(None) for _ in arg_names], + ), + statements, + [], + ) + mod = ast.Module([func_def]) + ast.fix_missing_locations(mod) + + prog = compile(mod, '', 'exec') + + # Debug: show bytecode. + if debug: + dis.dis(prog) + for const in prog.co_consts: + if isinstance(const, types.CodeType): + dis.dis(const) + + the_locals = {} + exec prog in {}, the_locals + return the_locals[name] + + +# AST nodes for the template language. + class Symbol(object): """A variable-substitution symbol in a template.""" def __init__(self, ident, original): @@ -62,6 +152,11 @@ class Symbol(object): # Keep original text. return self.original + def translate(self): + """Compile the variable lookup.""" + expr = ex_rvalue(VARIABLE_PREFIX + self.ident.encode('utf8')) + return [expr], set([self.ident.encode('utf8')]), set() + class Call(object): """A function call in a template.""" def __init__(self, ident, args, original): @@ -81,7 +176,7 @@ class Call(object): arg_vals = [expr.evaluate(env) for expr in self.args] try: out = env.functions[self.ident](*arg_vals) - except Exception, exc: + except Exception as exc: # Function raised exception! Maybe inlining the name of # the exception will help debug. return u'<%s>' % unicode(exc) @@ -89,6 +184,36 @@ class Call(object): else: return self.original + def translate(self): + """Compile the function call.""" + varnames = set() + funcnames = set([self.ident.encode('utf8')]) + + arg_exprs = [] + for arg in self.args: + subexprs, subvars, subfuncs = arg.translate() + varnames.update(subvars) + funcnames.update(subfuncs) + + # Create a subexpression that joins the result components of + # the arguments. + arg_exprs.append(ex_call( + ast.Attribute(ex_literal(u''), 'join', ast.Load()), + [ex_call( + 'map', + [ + ex_rvalue('unicode'), + ast.List(subexprs, ast.Load()), + ] + )], + )) + + subexpr_call = ex_call( + FUNCTION_PREFIX + self.ident.encode('utf8'), + arg_exprs + ) + return [subexpr_call], varnames, funcnames + class Expression(object): """Top-level template construct: contains a list of text blobs, Symbols, and Calls. @@ -111,6 +236,26 @@ class Expression(object): out.append(part.evaluate(env)) return u''.join(map(unicode, out)) + def translate(self): + """Compile the expression to a list of Python AST expressions, a + set of variable names used, and a set of function names. + """ + expressions = [] + varnames = set() + funcnames = set() + for part in self.parts: + if isinstance(part, basestring): + expressions.append(ex_literal(part)) + else: + e, v, f = part.translate() + expressions.extend(e) + varnames.update(v) + funcnames.update(f) + return expressions, varnames, funcnames + + +# Parser. + class ParseError(Exception): pass @@ -266,7 +411,7 @@ class Parser(object): # No function name. self.parts.append(FUNC_DELIM) return - + if self.pos >= len(self.string): # Identifier terminates string. self.parts.append(self.string[start_pos:self.pos]) @@ -304,7 +449,7 @@ class Parser(object): # Extract and advance past the parsed expression. expressions.append(Expression(subparser.parts)) - self.pos += subparser.pos + self.pos += subparser.pos if self.pos >= len(self.string) or \ self.string[self.pos] == GROUP_CLOSE: @@ -340,14 +485,74 @@ def _parse(template): parts.append(remainder) return Expression(parts) + +# External interface. + class Template(object): """A string template, including text, Symbols, and Calls. """ def __init__(self, template): self.expr = _parse(template) self.original = template + self.compiled = self.translate() + + def interpret(self, values={}, functions={}): + """Like `substitute`, but forces the interpreter (rather than + the compiled version) to be used. The interpreter includes + exception-handling code for missing variables and buggy template + functions but is much slower. + """ + return self.expr.evaluate(Environment(values, functions)) def substitute(self, values={}, functions={}): """Evaluate the template given the values and functions. """ - return self.expr.evaluate(Environment(values, functions)) + try: + res = self.compiled(values, functions) + except: # Handle any exceptions thrown by compiled version. + res = self.interpret(values, functions) + return res + + def translate(self): + """Compile the template to a Python function.""" + expressions, varnames, funcnames = self.expr.translate() + + argnames = [] + for varname in varnames: + argnames.append(VARIABLE_PREFIX.encode('utf8') + varname) + for funcname in funcnames: + argnames.append(FUNCTION_PREFIX.encode('utf8') + funcname) + + func = compile_func( + argnames, + [ast.Return(ast.List(expressions, ast.Load()))], + ) + + def wrapper_func(values={}, functions={}): + args = {} + for varname in varnames: + args[VARIABLE_PREFIX + varname] = values[varname] + for funcname in funcnames: + args[FUNCTION_PREFIX + funcname] = functions[funcname] + parts = func(**args) + return u''.join(parts) + + return wrapper_func + + +# Performance tests. + +if __name__ == '__main__': + import timeit + _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') + _vars = {'bar': 'qux'} + _funcs = {'baz': unicode.upper} + interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', + 'from __main__ import _tmpl, _vars, _funcs', + number=10000) + print(interp_time) + comp_time = timeit.timeit('_tmpl.substitute(_vars, _funcs)', + 'from __main__ import _tmpl, _vars, _funcs', + number=10000) + print(comp_time) + print('Speedup:', interp_time / comp_time) diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py index 6adbf160..b81db3c7 100644 --- a/lib/beets/util/pipeline.py +++ b/lib/beets/util/pipeline.py @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. @@ -30,7 +30,8 @@ up a bottleneck stage by dividing its work among multiple threads. To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ -from __future__ import with_statement # for Python 2.5 +from __future__ import print_function + import Queue from threading import Thread, Lock import sys @@ -177,23 +178,23 @@ class FirstPipelineThread(PipelineThread): self.coro = coro self.out_queue = out_queue self.out_queue.acquire() - + self.abort_lock = Lock() self.abort_flag = False - + def run(self): try: while True: with self.abort_lock: if self.abort_flag: return - + # Get the value from the generator. try: msg = self.coro.next() except StopIteration: break - + # Send messages to the next stage. for msg in _allmsgs(msg): with self.abort_lock: @@ -207,7 +208,7 @@ class FirstPipelineThread(PipelineThread): # Generator finished; shut down the pipeline. self.out_queue.release() - + class MiddlePipelineThread(PipelineThread): """A thread running any stage in the pipeline except the first or last. @@ -223,7 +224,7 @@ class MiddlePipelineThread(PipelineThread): try: # Prime the coroutine. self.coro.next() - + while True: with self.abort_lock: if self.abort_flag: @@ -233,14 +234,14 @@ class MiddlePipelineThread(PipelineThread): msg = self.in_queue.get() if msg is POISON: break - + with self.abort_lock: if self.abort_flag: return # Invoke the current stage. out = self.coro.send(msg) - + # Send messages to next stage. for msg in _allmsgs(out): with self.abort_lock: @@ -251,7 +252,7 @@ class MiddlePipelineThread(PipelineThread): except: self.abort_all(sys.exc_info()) return - + # Pipeline is shutting down normally. self.out_queue.release() @@ -273,12 +274,12 @@ class LastPipelineThread(PipelineThread): with self.abort_lock: if self.abort_flag: return - + # Get the message from the previous stage. msg = self.in_queue.get() if msg is POISON: break - + with self.abort_lock: if self.abort_flag: return @@ -308,7 +309,7 @@ class Pipeline(object): self.stages.append((stage,)) else: self.stages.append(stage) - + def run_sequential(self): """Run the pipeline sequentially in the current thread. The stages are run one after the other. Only the first coroutine @@ -319,7 +320,7 @@ class Pipeline(object): # "Prime" the coroutines. for coro in coros[1:]: coro.next() - + # Begin the pipeline. for out in coros[0]: msgs = _allmsgs(out) @@ -329,7 +330,7 @@ class Pipeline(object): out = coro.send(msg) next_msgs.extend(_allmsgs(out)) msgs = next_msgs - + def run_parallel(self, queue_size=DEFAULT_QUEUE_SIZE): """Run the pipeline in parallel using one thread per stage. The messages between the stages are stored in queues of the given @@ -354,11 +355,11 @@ class Pipeline(object): threads.append( LastPipelineThread(coro, queues[-1], threads) ) - + # Start threads. for thread in threads: thread.start() - + # Wait for termination. The final thread lasts the longest. try: # Using a timeout allows us to receive KeyboardInterrupt @@ -371,7 +372,7 @@ class Pipeline(object): for thread in threads: thread.abort() raise - + finally: # Make completely sure that all the threads have finished # before we return. They should already be either finished, @@ -388,25 +389,25 @@ class Pipeline(object): # Smoke test. if __name__ == '__main__': import time - + # Test a normally-terminating pipeline both in sequence and # in parallel. def produce(): for i in range(5): - print 'generating %i' % i + print('generating %i' % i) time.sleep(1) yield i def work(): num = yield while True: - print 'processing %i' % num + print('processing %i' % num) time.sleep(2) num = yield num*2 def consume(): while True: num = yield time.sleep(1) - print 'received %i' % num + print('received %i' % num) ts_start = time.time() Pipeline([produce(), work(), consume()]).run_sequential() ts_seq = time.time() @@ -414,21 +415,21 @@ if __name__ == '__main__': ts_par = time.time() Pipeline([produce(), (work(), work()), consume()]).run_parallel() ts_end = time.time() - print 'Sequential time:', ts_seq - ts_start - print 'Parallel time:', ts_par - ts_seq - print 'Multiply-parallel time:', ts_end - ts_par - print + print('Sequential time:', ts_seq - ts_start) + print('Parallel time:', ts_par - ts_seq) + print('Multiply-parallel time:', ts_end - ts_par) + print() # Test a pipeline that raises an exception. def exc_produce(): for i in range(10): - print 'generating %i' % i + print('generating %i' % i) time.sleep(1) yield i def exc_work(): num = yield while True: - print 'processing %i' % num + print('processing %i' % num) time.sleep(3) if num == 3: raise Exception() @@ -438,5 +439,5 @@ if __name__ == '__main__': num = yield #if num == 4: # raise Exception() - print 'received %i' % num + print('received %i' % num) Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py index 614bc8f5..815f8db3 100644 --- a/lib/beets/vfs.py +++ b/lib/beets/vfs.py @@ -8,7 +8,7 @@ # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: -# +# # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software.