From cf6a6a876f88813add36495f922ebd2e5d688452 Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Tue, 27 Jan 2015 22:26:35 +0100 Subject: [PATCH] Upgraded beets to 1.3.10, including patches --- lib/beets/LICENSE | 21 + lib/beets/README.rst | 94 ++ lib/beets/__init__.py | 9 +- lib/beets/autotag/__init__.py | 133 +-- lib/beets/autotag/hooks.py | 64 +- lib/beets/autotag/match.py | 142 ++- lib/beets/autotag/mb.py | 45 +- lib/beets/config_default.yaml | 7 + lib/beets/dbcore/__init__.py | 5 + lib/beets/dbcore/db.py | 343 ++++--- lib/beets/dbcore/query.py | 176 +++- lib/beets/dbcore/queryparse.py | 180 ++++ lib/beets/dbcore/types.py | 128 ++- lib/beets/importer.py | 1706 ++++++++++++++++++++------------ lib/beets/library.py | 887 +++++++++-------- lib/beets/mediafile.py | 705 ++++++++----- lib/beets/plugins.py | 116 ++- lib/beets/ui/__init__.py | 340 +++---- lib/beets/ui/commands.py | 715 ++++++++----- lib/beets/ui/migrate.py | 401 -------- lib/beets/util/__init__.py | 94 +- lib/beets/util/artresizer.py | 75 +- lib/beets/util/bluelet.py | 32 +- lib/beets/util/confit.py | 571 +++++++++-- lib/beets/util/enumeration.py | 176 +--- lib/beets/util/functemplate.py | 18 +- lib/beets/util/pipeline.py | 77 +- lib/beets/vfs.py | 2 + lib/beetsplug/embedart.py | 148 ++- lib/beetsplug/fetchart.py | 126 ++- lib/beetsplug/lyrics.py | 258 +++-- 31 files changed, 4792 insertions(+), 3002 deletions(-) create mode 100644 lib/beets/LICENSE create mode 100644 lib/beets/README.rst create mode 100644 lib/beets/dbcore/queryparse.py delete mode 100644 lib/beets/ui/migrate.py diff --git a/lib/beets/LICENSE b/lib/beets/LICENSE new file mode 100644 index 00000000..cddcf990 --- /dev/null +++ b/lib/beets/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2010-2014 Adrian Sampson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/beets/README.rst b/lib/beets/README.rst new file mode 100644 index 00000000..8c64e244 --- /dev/null +++ b/lib/beets/README.rst @@ -0,0 +1,94 @@ +.. image:: https://travis-ci.org/sampsyo/beets.svg?branch=master + :target: https://travis-ci.org/sampsyo/beets + +.. image:: http://img.shields.io/coveralls/sampsyo/beets.svg + :target: https://coveralls.io/r/sampsyo/beets + +.. image:: http://img.shields.io/pypi/v/beets.svg + :target: https://pypi.python.org/pypi/beets + +Beets is the media library management system for obsessive-compulsive music +geeks. + +The purpose of beets is to get your music collection right once and for all. +It catalogs your collection, automatically improving its metadata as it goes. +It then provides a bouquet of tools for manipulating and accessing your music. + +Here's an example of beets' brainy tag corrector doing its thing:: + + $ beet import ~/music/ladytron + Tagging: + Ladytron - Witching Hour + (Similarity: 98.4%) + * Last One Standing -> The Last One Standing + * Beauty -> Beauty*2 + * White Light Generation -> Whitelightgenerator + * All the Way -> All the Way... + +Because beets is designed as a library, it can do almost anything you can +imagine for your music collection. Via `plugins`_, beets becomes a panacea: + +- Fetch or calculate all the metadata you could possibly need: `album art`_, + `lyrics`_, `genres`_, `tempos`_, `ReplayGain`_ levels, or `acoustic + fingerprints`_. +- Get metadata from `MusicBrainz`_, `Discogs`_, or `Beatport`_. Or guess + metadata using songs' filenames or their acoustic fingerprints. +- `Transcode audio`_ to any format you like. +- Check your library for `duplicate tracks and albums`_ or for `albums that + are missing tracks`_. +- Clean up crufty tags left behind by other, less-awesome tools. +- Embed and extract album art from files' metadata. +- Browse your music library graphically through a Web browser and play it in any + browser that supports `HTML5 Audio`_. +- Analyze music files' metadata from the command line. +- Listen to your library with a music player that speaks the `MPD`_ protocol + and works with a staggering variety of interfaces. + +If beets doesn't do what you want yet, `writing your own plugin`_ is +shockingly simple if you know a little Python. + +.. _plugins: http://beets.readthedocs.org/page/plugins/ +.. _MPD: http://www.musicpd.org/ +.. _MusicBrainz music collection: http://musicbrainz.org/doc/Collections/ +.. _writing your own plugin: + http://beets.readthedocs.org/page/dev/plugins.html +.. _HTML5 Audio: + http://www.w3.org/TR/html-markup/audio.html +.. _albums that are missing tracks: + http://beets.readthedocs.org/page/plugins/missing.html +.. _duplicate tracks and albums: + http://beets.readthedocs.org/page/plugins/duplicates.html +.. _Transcode audio: + http://beets.readthedocs.org/page/plugins/convert.html +.. _Beatport: http://www.beatport.com/ +.. _Discogs: http://www.discogs.com/ +.. _acoustic fingerprints: + http://beets.readthedocs.org/page/plugins/chroma.html +.. _ReplayGain: http://beets.readthedocs.org/page/plugins/replaygain.html +.. _tempos: http://beets.readthedocs.org/page/plugins/echonest.html +.. _genres: http://beets.readthedocs.org/page/plugins/lastgenre.html +.. _album art: http://beets.readthedocs.org/page/plugins/fetchart.html +.. _lyrics: http://beets.readthedocs.org/page/plugins/lyrics.html +.. _MusicBrainz: http://musicbrainz.org/ + +Read More +--------- + +Learn more about beets at `its Web site`_. Follow `@b33ts`_ on Twitter for +news and updates. + +You can install beets by typing ``pip install beets``. Then check out the +`Getting Started`_ guide. + +.. _its Web site: http://beets.radbox.org/ +.. _Getting Started: http://beets.readthedocs.org/page/guides/main.html +.. _@b33ts: http://twitter.com/b33ts/ + +Authors +------- + +Beets is by `Adrian Sampson`_ with a supporting cast of thousands. For help, +please contact the `mailing list`_. + +.. _mailing list: https://groups.google.com/forum/#!forum/beets-users +.. _Adrian Sampson: http://homes.cs.washington.edu/~asampson/ diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py index 6010d4d3..d050a028 100644 --- a/lib/beets/__init__.py +++ b/lib/beets/__init__.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2014, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -12,13 +12,14 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -# This particular version has been slightly modified to work with headphones +# This particular version has been slightly modified to work with Headphones # https://github.com/rembo10/headphones -import os -__version__ = '1.3.4' +__version__ = '1.3.10-headphones' __author__ = 'Adrian Sampson ' +import os + import beets.library from beets.util import confit diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py index 371e6e44..7c517c60 100644 --- a/lib/beets/autotag/__init__.py +++ b/lib/beets/autotag/__init__.py @@ -14,135 +14,21 @@ """Facilities for automatically determining files' correct metadata. """ -import os import logging -import re -from beets import library, mediafile, config -from beets.util import sorted_walk, ancestry, displayable_path +from beets import config # Parts of external interface. -from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch -from .match import tag_item, tag_album -from .match import recommendation +from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa +from .match import tag_item, tag_album # noqa +from .match import Recommendation # noqa # Global logger. log = logging.getLogger('beets') -# Constants for directory walker. -MULTIDISC_MARKERS = (r'dis[ck]', r'cd') -MULTIDISC_PAT_FMT = r'^(.*%s[\W_]*)\d' - # Additional utilities for the main interface. -def albums_in_dir(path): - """Recursively searches the given directory and returns an iterable - of (paths, items) where paths is a list of directories and items is - a list of Items that is probably an album. Specifically, any folder - containing any media files is an album. - """ - collapse_pat = collapse_paths = collapse_items = None - - for root, dirs, files in sorted_walk(path, - ignore=config['ignore'].as_str_seq(), - logger=log): - # Get a list of items in the directory. - items = [] - for filename in files: - try: - i = library.Item.from_path(os.path.join(root, filename)) - except mediafile.FileTypeError: - pass - except mediafile.UnreadableFileError: - log.warn(u'unreadable file: {0}'.format( - displayable_path(filename)) - ) - else: - items.append(i) - - # If we're currently collapsing the constituent directories in a - # multi-disc album, check whether we should continue collapsing - # and add the current directory. If so, just add the directory - # and move on to the next directory. If not, stop collapsing. - if collapse_paths: - if (not collapse_pat and collapse_paths[0] in ancestry(root)) or \ - (collapse_pat and - collapse_pat.match(os.path.basename(root))): - # Still collapsing. - collapse_paths.append(root) - collapse_items += items - continue - else: - # Collapse finished. Yield the collapsed directory and - # proceed to process the current one. - if collapse_items: - yield collapse_paths, collapse_items - collapse_pat = collapse_paths = collapse_items = None - - # Check whether this directory looks like the *first* directory - # in a multi-disc sequence. There are two indicators: the file - # is named like part of a multi-disc sequence (e.g., "Title Disc - # 1") or it contains no items but only directories that are - # named in this way. - start_collapsing = False - for marker in MULTIDISC_MARKERS: - marker_pat = re.compile(MULTIDISC_PAT_FMT % marker, re.I) - match = marker_pat.match(os.path.basename(root)) - - # Is this directory the root of a nested multi-disc album? - if dirs and not items: - # Check whether all subdirectories have the same prefix. - start_collapsing = True - subdir_pat = None - for subdir in dirs: - # The first directory dictates the pattern for - # the remaining directories. - if not subdir_pat: - match = marker_pat.match(subdir) - if match: - subdir_pat = re.compile(r'^%s\d' % - re.escape(match.group(1)), re.I) - else: - start_collapsing = False - break - - # Subsequent directories must match the pattern. - elif not subdir_pat.match(subdir): - start_collapsing = False - break - - # If all subdirectories match, don't check other - # markers. - if start_collapsing: - break - - # Is this directory the first in a flattened multi-disc album? - elif match: - start_collapsing = True - # Set the current pattern to match directories with the same - # prefix as this one, followed by a digit. - collapse_pat = re.compile(r'^%s\d' % - re.escape(match.group(1)), re.I) - break - - # If either of the above heuristics indicated that this is the - # beginning of a multi-disc album, initialize the collapsed - # directory and item lists and check the next directory. - if start_collapsing: - # Start collapsing; continue to the next iteration. - collapse_paths = [root] - collapse_items = items - continue - - # If it's nonempty, yield it. - if items: - yield [root], items - - # Clear out any unfinished collapse. - if collapse_paths and collapse_items: - yield collapse_paths, collapse_items - def apply_item_metadata(item, track_info): """Set an item's metadata from its matched TrackInfo object. """ @@ -156,6 +42,7 @@ def apply_item_metadata(item, track_info): # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? + def apply_metadata(album_info, mapping): """Set the items' metadata to match an AlbumInfo object using a mapping from Items to TrackInfo objects. @@ -171,8 +58,8 @@ def apply_metadata(album_info, mapping): # Artist sort and credit names. item.artist_sort = track_info.artist_sort or album_info.artist_sort - item.artist_credit = track_info.artist_credit or \ - album_info.artist_credit + item.artist_credit = (track_info.artist_credit or + album_info.artist_credit) item.albumartist_sort = album_info.artist_sort item.albumartist_credit = album_info.artist_credit @@ -235,7 +122,6 @@ def apply_metadata(album_info, mapping): 'language', 'country', 'albumstatus', - 'media', 'albumdisambig'): value = getattr(album_info, field) if value is not None: @@ -243,5 +129,8 @@ def apply_metadata(album_info, mapping): if track_info.disctitle is not None: item.disctitle = track_info.disctitle + if track_info.media is not None: + item.media = track_info.media + # Headphones seal of approval - item.comments = 'tagged by headphones/beets' + item.comments = 'tagged by headphones/beets' \ No newline at end of file diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py index 883703f2..beb3bd91 100644 --- a/lib/beets/autotag/hooks.py +++ b/lib/beets/autotag/hooks.py @@ -116,6 +116,7 @@ class AlbumInfo(object): for track in self.tracks: track.decode(codec) + class TrackInfo(object): """Describes a canonical track present on a release. Appears as part of an AlbumInfo's ``tracks`` list. Consists of these data members: @@ -126,6 +127,7 @@ class TrackInfo(object): - ``artist_id`` - ``length``: float: duration of the track in seconds - ``index``: position on the entire release + - ``media``: delivery mechanism (Vinyl, etc.) - ``medium``: the disc number this track appears on in the album - ``medium_index``: the track's position on the disc - ``medium_total``: the number of tracks on the item's disc @@ -140,13 +142,15 @@ class TrackInfo(object): def __init__(self, title, track_id, artist=None, artist_id=None, length=None, index=None, medium=None, medium_index=None, medium_total=None, artist_sort=None, disctitle=None, - artist_credit=None, data_source=None, data_url=None): + artist_credit=None, data_source=None, data_url=None, + media=None): self.title = title self.track_id = track_id self.artist = artist self.artist_id = artist_id self.length = length self.index = index + self.media = media self.medium = medium self.medium_index = medium_index self.medium_total = medium_total @@ -162,7 +166,7 @@ class TrackInfo(object): to Unicode. """ for fld in ['title', 'artist', 'medium', 'artist_sort', 'disctitle', - 'artist_credit']: + 'artist_credit', 'media']: value = getattr(self, fld) if isinstance(value, str): setattr(self, fld, value.decode(codec, 'ignore')) @@ -187,6 +191,7 @@ SD_REPLACE = [ (r'&', 'and'), ] + def _string_dist_basic(str1, str2): """Basic edit distance between two strings, ignoring non-alphanumeric characters and case. Comparisons are based on a @@ -201,13 +206,16 @@ def _string_dist_basic(str1, str2): return 0.0 return levenshtein(str1, str2) / float(max(len(str1), len(str2))) + def string_dist(str1, str2): """Gives an "intuitive" edit distance between two strings. This is an edit distance, normalized by the string length, with a number of tweaks that reflect intuition about text. """ - if str1 is None and str2 is None: return 0.0 - if str1 is None or str2 is None: return 1.0 + if str1 is None and str2 is None: + return 0.0 + if str1 is None or str2 is None: + return 1.0 str1 = str1.lower() str2 = str2.lower() @@ -217,9 +225,9 @@ def string_dist(str1, str2): # "something, the". for word in SD_END_WORDS: if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word)-2]) + str1 = '%s %s' % (word, str1[:-len(word) - 2]) if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word)-2]) + str2 = '%s %s' % (word, str2[:-len(word) - 2]) # Perform a couple of basic normalizing substitutions. for pat, repl in SD_REPLACE: @@ -256,6 +264,23 @@ def string_dist(str1, str2): return base_dist + penalty + +class LazyClassProperty(object): + """A decorator implementing a read-only property that is *lazy* in + the sense that the getter is only invoked once. Subsequent accesses + through *any* instance use the cached result. + """ + def __init__(self, getter): + self.getter = getter + self.computed = False + + def __get__(self, obj, owner): + if not self.computed: + self.value = self.getter(owner) + self.computed = True + return self.value + + class Distance(object): """Keeps track of multiple distance penalties. Provides a single weighted distance for all penalties as well as a weighted distance @@ -264,11 +289,15 @@ class Distance(object): def __init__(self): self._penalties = {} + @LazyClassProperty + def _weights(cls): + """A dictionary from keys to floating-point weights. + """ weights_view = config['match']['distance_weights'] - self._weights = {} + weights = {} for key in weights_view.keys(): - self._weights[key] = weights_view[key].as_number() - + weights[key] = weights_view[key].as_number() + return weights # Access the components and their aggregates. @@ -313,8 +342,7 @@ class Distance(object): # Convert distance into a negative float we can sort items in # ascending order (for keys, when the penalty is equal) and # still get the items with the biggest distance first. - return sorted(list_, key=lambda (key, dist): (0-dist, key)) - + return sorted(list_, key=lambda (key, dist): (0 - dist, key)) # Behave like a float. @@ -323,13 +351,13 @@ class Distance(object): def __float__(self): return self.distance + def __sub__(self, other): return self.distance - other def __rsub__(self, other): return other - self.distance - # Behave like a dict. def __getitem__(self, key): @@ -355,11 +383,11 @@ class Distance(object): """ if not isinstance(dist, Distance): raise ValueError( - '`dist` must be a Distance object. It is: %r' % dist) + '`dist` must be a Distance object, not {0}'.format(type(dist)) + ) for key, penalties in dist._penalties.iteritems(): self._penalties.setdefault(key, []).extend(penalties) - # Adding components. def _eq(self, value1, value2): @@ -379,7 +407,8 @@ class Distance(object): """ if not 0.0 <= dist <= 1.0: raise ValueError( - '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + '`dist` must be between 0.0 and 1.0, not {0}'.format(dist) + ) self._penalties.setdefault(key, []).append(dist) def add_equality(self, key, value, options): @@ -476,6 +505,7 @@ def album_for_mbid(release_id): except mb.MusicBrainzAPIError as exc: exc.log(log) + def track_for_mbid(recording_id): """Get a TrackInfo object for a MusicBrainz recording ID. Return None if the ID is not found. @@ -485,18 +515,21 @@ def track_for_mbid(recording_id): except mb.MusicBrainzAPIError as exc: exc.log(log) + def albums_for_id(album_id): """Get a list of albums for an ID.""" candidates = [album_for_mbid(album_id)] candidates.extend(plugins.album_for_id(album_id)) return filter(None, candidates) + def tracks_for_id(track_id): """Get a list of tracks for an ID.""" candidates = [track_for_mbid(track_id)] candidates.extend(plugins.track_for_id(track_id)) return filter(None, candidates) + def album_candidates(items, artist, album, va_likely): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective @@ -525,6 +558,7 @@ def album_candidates(items, artist, album, va_likely): return out + def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py index a4bc47fa..2d1f2007 100644 --- a/lib/beets/autotag/match.py +++ b/lib/beets/autotag/match.py @@ -25,11 +25,8 @@ from munkres import Munkres from beets import plugins from beets import config from beets.util import plurality -from beets.util.enumeration import enum from beets.autotag import hooks - -# Recommendation enumeration. -recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation') +from beets.util.enumeration import OrderedEnum # Artist signals that indicate "various artists". These are used at the # album level to determine whether a given release is likely a VA @@ -41,6 +38,18 @@ VA_ARTISTS = (u'', u'various artists', u'various', u'va', u'unknown') log = logging.getLogger('beets') +# Recommendation enumeration. + +class Recommendation(OrderedEnum): + """Indicates a qualitative suggestion to the user about what should + be done with a given match. + """ + none = 0 + low = 1 + medium = 2 + strong = 3 + + # Primary matching functionality. def current_metadata(items): @@ -56,10 +65,10 @@ def current_metadata(items): fields = ['artist', 'album', 'albumartist', 'year', 'disctotal', 'mb_albumid', 'label', 'catalognum', 'country', 'media', 'albumdisambig'] - for key in fields: - values = [getattr(item, key) for item in items if item] - likelies[key], freq = plurality(values) - consensus[key] = (freq == len(values)) + for field in fields: + values = [item[field] for item in items if item] + likelies[field], freq = plurality(values) + consensus[field] = (freq == len(values)) # If there's an album artist consensus, use this for the artist. if consensus['albumartist'] and likelies['albumartist']: @@ -67,6 +76,7 @@ def current_metadata(items): return likelies, consensus + def assign_items(items, tracks): """Given a list of Items and a list of TrackInfo objects, find the best mapping between them. Returns a mapping from Items to TrackInfo @@ -93,12 +103,14 @@ def assign_items(items, tracks): extra_tracks.sort(key=lambda t: (t.index, t.title)) return mapping, extra_items, extra_tracks + def track_index_changed(item, track_info): """Returns True if the item and track info index is different. Tolerates per disc and per release numbering. """ return item.track not in (track_info.medium_index, track_info.index) + def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a Distance object. `incl_artist` indicates that a distance component should @@ -109,7 +121,7 @@ def track_distance(item, track_info, incl_artist=False): # Length. if track_info.length: diff = abs(item.length - track_info.length) - \ - config['match']['track_length_grace'].as_number() + config['match']['track_length_grace'].as_number() dist.add_ratio('track_length', diff, config['match']['track_length_max'].as_number()) @@ -134,6 +146,7 @@ def track_distance(item, track_info, incl_artist=False): return dist + def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. Returns a Distance object. `album_info` is an AlbumInfo object @@ -239,6 +252,7 @@ def distance(items, album_info, mapping): return dist + def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an AlbumInfo object for the corresponding album. Otherwise, returns @@ -247,16 +261,17 @@ def match_by_id(items): # Is there a consensus on the MB album ID? albumids = [item.mb_albumid for item in items if item.mb_albumid] if not albumids: - log.debug('No album IDs found.') + log.debug(u'No album IDs found.') return None # If all album IDs are equal, look up the album. - if bool(reduce(lambda x,y: x if x==y else (), albumids)): + if bool(reduce(lambda x, y: x if x == y else (), albumids)): albumid = albumids[0] - log.debug('Searching for discovered album ID: ' + albumid) + log.debug(u'Searching for discovered album ID: {0}'.format(albumid)) return hooks.album_for_mbid(albumid) else: - log.debug('No album ID consensus.') + log.debug(u'No album ID consensus.') + def _recommendation(results): """Given a sorted list of AlbumMatch or TrackMatch objects, return a @@ -268,26 +283,26 @@ def _recommendation(results): """ if not results: # No candidates: no recommendation. - return recommendation.none + return Recommendation.none # Basic distance thresholding. min_dist = results[0].distance if min_dist < config['match']['strong_rec_thresh'].as_number(): # Strong recommendation level. - rec = recommendation.strong + rec = Recommendation.strong elif min_dist <= config['match']['medium_rec_thresh'].as_number(): # Medium recommendation level. - rec = recommendation.medium + rec = Recommendation.medium elif len(results) == 1: # Only a single candidate. - rec = recommendation.low + rec = Recommendation.low elif results[1].distance - min_dist >= \ config['match']['rec_gap_thresh'].as_number(): # Gap between first two candidates is large. - rec = recommendation.low + rec = Recommendation.low else: # No conclusion. Return immediately. Can't be downgraded any further. - return recommendation.none + return Recommendation.none # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. @@ -299,28 +314,40 @@ def _recommendation(results): for key in keys: if key in max_rec_view.keys(): max_rec = max_rec_view[key].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, + 'strong': Recommendation.strong, + 'medium': Recommendation.medium, + 'low': Recommendation.low, + 'none': Recommendation.none, }) rec = min(rec, max_rec) return rec + def _add_candidate(items, results, info): """Given a candidate AlbumInfo object, attempt to add the candidate to the output dictionary of AlbumMatch objects. This involves checking the track count, ordering the items, checking for duplicates, and calculating the distance. """ - log.debug('Candidate: %s - %s' % (info.artist, info.album)) + log.debug(u'Candidate: {0} - {1}'.format(info.artist, info.album)) + + # Discard albums with zero tracks. + if not info.tracks: + log.debug('No tracks.') + return # Don't duplicate. if info.album_id in results: - log.debug('Duplicate.') + log.debug(u'Duplicate.') return + # Discard matches without required tags. + for req_tag in config['match']['required'].as_str_seq(): + if getattr(info, req_tag) is None: + log.debug(u'Ignored. Missing required tag: {0}'.format(req_tag)) + return + # Find mapping between the items and the track info. mapping, extra_items, extra_tracks = assign_items(items, info.tracks) @@ -331,30 +358,36 @@ def _add_candidate(items, results, info): penalties = [key for _, key in dist] for penalty in config['match']['ignored'].as_str_seq(): if penalty in penalties: - log.debug('Ignored. Penalty: %s' % penalty) + log.debug(u'Ignored. Penalty: {0}'.format(penalty)) return - log.debug('Success. Distance: %f' % dist) + log.debug(u'Success. Distance: {0}'.format(dist)) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) + def tag_album(items, search_artist=None, search_album=None, search_id=None): - """Bundles together the functionality used to infer tags for a - set of items comprised by an album. Returns everything relevant: - - The current artist. - - The current album. - - A list of AlbumMatch objects. The candidates are sorted by - distance (i.e., best match first). - - A recommendation. - If search_artist and search_album or search_id are provided, then - they are used as search terms in place of the current metadata. + """Return a tuple of a artist name, an album name, a list of + `AlbumMatch` candidates from the metadata backend, and a + `Recommendation`. + + The artist and album are the most common values of these fields + among `items`. + + The `AlbumMatch` objects are generated by searching the metadata + backends. By default, the metadata of the items is used for the + search. This can be customized by setting the parameters. The + `mapping` field of the album has the matched `items` as keys. + + The recommendation is calculated from the match qualitiy of the + candidates. """ # Get current metadata. likelies, consensus = current_metadata(items) cur_artist = likelies['artist'] cur_album = likelies['album'] - log.debug('Tagging %s - %s' % (cur_artist, cur_album)) + log.debug(u'Tagging {0} - {1}'.format(cur_artist, cur_album)) # The output result (distance, AlbumInfo) tuples (keyed by MB album # ID). @@ -362,7 +395,7 @@ def tag_album(items, search_artist=None, search_album=None, # Search by explicit ID. if search_id is not None: - log.debug('Searching for album ID: ' + search_id) + log.debug(u'Searching for album ID: {0}'.format(search_id)) search_cands = hooks.albums_for_id(search_id) # Use existing metadata or text search. @@ -372,32 +405,33 @@ def tag_album(items, search_artist=None, search_album=None, if id_info: _add_candidate(items, candidates, id_info) rec = _recommendation(candidates.values()) - log.debug('Album ID match recommendation is ' + str(rec)) + log.debug(u'Album ID match recommendation is {0}'.format(str(rec))) if candidates and not config['import']['timid']: # If we have a very good MBID match, return immediately. # Otherwise, this match will compete against metadata-based # matches. - if rec == recommendation.strong: - log.debug('ID match.') + if rec == Recommendation.strong: + log.debug(u'ID match.') return cur_artist, cur_album, candidates.values(), rec # Search terms. if not (search_artist and search_album): # No explicit search terms -- use current metadata. search_artist, search_album = cur_artist, cur_album - log.debug(u'Search terms: %s - %s' % (search_artist, search_album)) + log.debug(u'Search terms: {0} - {1}'.format(search_artist, + search_album)) # Is this album likely to be a "various artist" release? va_likely = ((not consensus['artist']) or - (search_artist.lower() in VA_ARTISTS) or - any(item.comp for item in items)) - log.debug(u'Album might be VA: %s' % str(va_likely)) + (search_artist.lower() in VA_ARTISTS) or + any(item.comp for item in items)) + log.debug(u'Album might be VA: {0}'.format(str(va_likely))) # Get the results from the data sources. search_cands = hooks.album_candidates(items, search_artist, search_album, va_likely) - log.debug(u'Evaluating %i candidates.' % len(search_cands)) + log.debug(u'Evaluating {0} candidates.'.format(len(search_cands))) for info in search_cands: _add_candidate(items, candidates, info) @@ -406,6 +440,7 @@ def tag_album(items, search_artist=None, search_album=None, rec = _recommendation(candidates) return cur_artist, cur_album, candidates, rec + def tag_item(item, search_artist=None, search_title=None, search_id=None): """Attempts to find metadata for a single track. Returns a @@ -421,15 +456,15 @@ def tag_item(item, search_artist=None, search_title=None, # First, try matching by MusicBrainz ID. trackid = search_id or item.mb_trackid if trackid: - log.debug('Searching for track ID: ' + trackid) + log.debug(u'Searching for track ID: {0}'.format(trackid)) for track_info in hooks.tracks_for_id(trackid): dist = track_distance(item, track_info, incl_artist=True) candidates[track_info.track_id] = \ - hooks.TrackMatch(dist, track_info) + hooks.TrackMatch(dist, track_info) # If this is a good match, then don't keep searching. rec = _recommendation(candidates.values()) - if rec == recommendation.strong and not config['import']['timid']: - log.debug('Track ID match.') + if rec == Recommendation.strong and not config['import']['timid']: + log.debug(u'Track ID match.') return candidates.values(), rec # If we're searching by ID, don't proceed. @@ -437,12 +472,13 @@ def tag_item(item, search_artist=None, search_title=None, if candidates: return candidates.values(), rec else: - return [], recommendation.none + return [], Recommendation.none # Search terms. if not (search_artist and search_title): search_artist, search_title = item.artist, item.title - log.debug(u'Item search terms: %s - %s' % (search_artist, search_title)) + log.debug(u'Item search terms: {0} - {1}'.format(search_artist, + search_title)) # Get and evaluate candidate metadata. for track_info in hooks.item_candidates(item, search_artist, search_title): @@ -450,7 +486,7 @@ def tag_item(item, search_artist=None, search_title=None, candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) # Sort by distance and return with recommendation. - log.debug('Found %i candidates.' % len(candidates)) + log.debug(u'Found {0} candidates.'.format(len(candidates))) candidates = sorted(candidates.itervalues()) rec = _recommendation(candidates) return candidates, rec diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py index 779ec4b3..d063f627 100644 --- a/lib/beets/autotag/mb.py +++ b/lib/beets/autotag/mb.py @@ -32,6 +32,7 @@ BASE_URL = 'http://musicbrainz.org/' musicbrainzngs.set_useragent('beets', beets.__version__, 'http://beets.radbox.org/') + class MusicBrainzAPIError(util.HumanReadableException): """An error while talking to MusicBrainz. The `query` field is the parameter to the action and may have any type. @@ -41,7 +42,7 @@ class MusicBrainzAPIError(util.HumanReadableException): super(MusicBrainzAPIError, self).__init__(reason, verb, tb) def get_message(self): - return u'"{0}" in {1} with query {2}'.format( + return u'{0} in {1} with query {2}'.format( self._reasonstr(), self.verb, repr(self.query) ) @@ -51,12 +52,15 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'labels', 'artist-credits', 'aliases'] TRACK_INCLUDES = ['artists', 'aliases'] + def track_url(trackid): return urljoin(BASE_URL, 'recording/' + trackid) + def album_url(albumid): return urljoin(BASE_URL, 'release/' + albumid) + def configure(): """Set up the python-musicbrainz-ngs module according to settings from the beets configuration. This should be called at startup. @@ -67,6 +71,7 @@ def configure(): config['musicbrainz']['ratelimit'].get(int), ) + def _preferred_alias(aliases): """Given an list of alias structures for an artist credit, select and return the user's preferred alias alias or None if no matching @@ -81,13 +86,15 @@ def _preferred_alias(aliases): # Search configured locales in order. for locale in config['import']['languages'].as_str_seq(): # Find matching primary aliases for this locale. - matches = [a for a in aliases if a['locale'] == locale and 'primary' in a] + matches = [a for a in aliases + if a['locale'] == locale and 'primary' in a] # Skip to the next locale if we have no matches if not matches: continue return matches[0] + def _flatten_artist_credit(credit): """Given a list representing an ``artist-credit`` block, flatten the data into a triple of joined artist name strings: canonical, sort, and @@ -133,6 +140,7 @@ def _flatten_artist_credit(credit): ''.join(artist_credit_parts), ) + def track_info(recording, index=None, medium=None, medium_index=None, medium_total=None): """Translates a MusicBrainz recording result dictionary into a beets @@ -167,6 +175,7 @@ def track_info(recording, index=None, medium=None, medium_index=None, info.decode() return info + def _set_date_str(info, date_str, original=False): """Given a (possibly partial) YYYY-MM-DD string and an AlbumInfo object, set the object's release date fields appropriately. If @@ -186,6 +195,7 @@ def _set_date_str(info, date_str, original=False): key = 'original_' + key setattr(info, key, date_num) + def album_info(release): """Takes a MusicBrainz release result dictionary and returns a beets AlbumInfo object containing the interesting data about that release. @@ -199,6 +209,7 @@ def album_info(release): index = 0 for medium in release['medium-list']: disctitle = medium.get('title') + format = medium.get('format') for track in medium['track-list']: # Basic information from the recording. index += 1 @@ -210,6 +221,7 @@ def album_info(release): len(medium['track-list']), ) ti.disctitle = disctitle + ti.media = format # Prefer track data, where present, over recording data. if track.get('title'): @@ -288,6 +300,7 @@ def album_info(release): info.decode() return info + def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): """Searches for a single album ("release" in MusicBrainz parlance) and returns an iterator over AlbumInfo objects. May raise a @@ -297,9 +310,9 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): optionally, a number of tracks on the album. """ # Build search criteria. - criteria = {'release': album.lower()} + criteria = {'release': album.lower().strip()} if artist is not None: - criteria['artist'] = artist.lower() + criteria['artist'] = artist.lower().strip() else: # Various Artists search. criteria['arid'] = VARIOUS_ARTISTS_ID @@ -322,13 +335,14 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): if albuminfo is not None: yield albuminfo + def match_track(artist, title, limit=SEARCH_LIMIT): """Searches for a single track and returns an iterable of TrackInfo objects. May raise a MusicBrainzAPIError. """ criteria = { - 'artist': artist.lower(), - 'recording': title.lower(), + 'artist': artist.lower().strip(), + 'recording': title.lower().strip(), } if not any(criteria.itervalues()): @@ -342,6 +356,7 @@ def match_track(artist, title, limit=SEARCH_LIMIT): for recording in res['recording-list']: yield track_info(recording) + def _parse_id(s): """Search for a MusicBrainz ID in the given string and return it. If no ID can be found, return None. @@ -351,38 +366,40 @@ def _parse_id(s): if match: return match.group() -def album_for_id(albumid): + +def album_for_id(releaseid): """Fetches an album by its MusicBrainz ID and returns an AlbumInfo object or None if the album is not found. May raise a MusicBrainzAPIError. """ - albumid = _parse_id(albumid) + albumid = _parse_id(releaseid) if not albumid: - log.error('Invalid MBID.') + log.debug(u'Invalid MBID ({0}).'.format(releaseid)) return try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) except musicbrainzngs.ResponseError: - log.debug('Album ID match failed.') + log.debug(u'Album ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: raise MusicBrainzAPIError(exc, 'get release by ID', albumid, traceback.format_exc()) return album_info(res['release']) -def track_for_id(trackid): + +def track_for_id(releaseid): """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object or None if no track is found. May raise a MusicBrainzAPIError. """ - trackid = _parse_id(trackid) + trackid = _parse_id(releaseid) if not trackid: - log.error('Invalid MBID.') + log.debug(u'Invalid MBID ({0}).'.format(releaseid)) return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) except musicbrainzngs.ResponseError: - log.debug('Track ID match failed.') + log.debug(u'Track ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, diff --git a/lib/beets/config_default.yaml b/lib/beets/config_default.yaml index d35368ea..78f16d05 100644 --- a/lib/beets/config_default.yaml +++ b/lib/beets/config_default.yaml @@ -5,6 +5,7 @@ import: write: yes copy: yes move: no + link: no delete: no resume: ask incremental: no @@ -20,6 +21,7 @@ import: detail: no flat: no group_albums: no + pretend: false clutter: ["Thumbs.DB", ".DS_Store"] ignore: [".*", "*~", "System Volume Information"] @@ -32,6 +34,7 @@ replace: '\s+$': '' '^\s+': '' path_sep_replace: _ +asciify_paths: false art_filename: cover max_filename_length: 0 @@ -54,6 +57,9 @@ list_format_item: $artist - $album - $title list_format_album: $albumartist - $album time_format: '%Y-%m-%d %H:%M:%S' +sort_album: albumartist+ album+ +sort_item: artist+ album+ disc+ track+ + paths: default: $albumartist/$album%aunique{}/$track $title singleton: Non-Album/$artist/$title @@ -98,5 +104,6 @@ match: media: [] original_year: no ignored: [] + required: [] track_length_grace: 10 track_length_max: 30 diff --git a/lib/beets/dbcore/__init__.py b/lib/beets/dbcore/__init__.py index b4f80fb9..c364fdfc 100644 --- a/lib/beets/dbcore/__init__.py +++ b/lib/beets/dbcore/__init__.py @@ -18,3 +18,8 @@ Library. from .db import Model, Database from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery from .types import Type +from .queryparse import query_from_strings +from .queryparse import sort_from_strings +from .queryparse import parse_sorted_query + +# flake8: noqa diff --git a/lib/beets/dbcore/db.py b/lib/beets/dbcore/db.py index cbdaf5a7..0c786daa 100644 --- a/lib/beets/dbcore/db.py +++ b/lib/beets/dbcore/db.py @@ -20,16 +20,62 @@ from collections import defaultdict import threading import sqlite3 import contextlib +import collections import beets from beets.util.functemplate import Template -from .query import MatchQuery +from beets.dbcore import types +from .query import MatchQuery, NullSort, TrueQuery +class FormattedMapping(collections.Mapping): + """A `dict`-like formatted view of a model. + + The accessor `mapping[key]` returns the formated version of + `model[key]` as a unicode string. + + If `for_path` is true, all path separators in the formatted values + are replaced. + """ + + def __init__(self, model, for_path=False): + self.for_path = for_path + self.model = model + self.model_keys = model.keys(True) + + def __getitem__(self, key): + if key in self.model_keys: + return self._get_formatted(self.model, key) + else: + raise KeyError(key) + + def __iter__(self): + return iter(self.model_keys) + + def __len__(self): + return len(self.model_keys) + + def get(self, key, default=None): + if default is None: + default = self.model._type(key).format(None) + return super(FormattedMapping, self).get(key, default) + + def _get_formatted(self, model, key): + value = model._type(key).format(model.get(key)) + if isinstance(value, bytes): + value = value.decode('utf8', 'ignore') + + if self.for_path: + sep_repl = beets.config['path_sep_replace'].get(unicode) + for sep in (os.path.sep, os.path.altsep): + if sep: + value = value.replace(sep, sep_repl) + + return value + # Abstract base for model classes. - class Model(object): """An abstract object representing an object in the database. Model objects act like dictionaries (i.e., the allow subscript access like @@ -66,12 +112,7 @@ class Model(object): _fields = {} """A mapping indicating available "fixed" fields on this type. The - keys are field names and the values are Type objects. - """ - - _bytes_keys = () - """Keys whose values should be stored as raw bytes blobs rather than - strings. + keys are field names and the values are `Type` objects. """ _search_fields = () @@ -79,6 +120,21 @@ class Model(object): terms. """ + _types = {} + """Optional Types for non-fixed (i.e., flexible and computed) fields. + """ + + _sorts = {} + """Optional named sort criteria. The keys are strings and the values + are subclasses of `Sort`. + """ + + _always_dirty = False + """By default, fields only become "dirty" when their value actually + changes. Enabling this flag marks fields as dirty even when the new + value is the same as the old value (e.g., `o.f = o.f`). + """ + @classmethod def _getters(cls): """Return a mapping from field names to getter functions. @@ -94,7 +150,6 @@ class Model(object): # As above: we could consider caching this result. raise NotImplementedError() - # Basic operation. def __init__(self, db=None, **values): @@ -110,6 +165,20 @@ class Model(object): self.update(values) self.clear_dirty() + @classmethod + def _awaken(cls, db=None, fixed_values={}, flex_values={}): + """Create an object with values drawn from the database. + + This is a performance optimization: the checks involved with + ordinary construction are bypassed. + """ + obj = cls(db) + for key, value in fixed_values.iteritems(): + obj._values_fixed[key] = cls._type(key).from_sql(value) + for key, value in flex_values.iteritems(): + obj._values_flex[key] = cls._type(key).from_sql(value) + return obj + def __repr__(self): return '{0}({1})'.format( type(self).__name__, @@ -132,9 +201,17 @@ class Model(object): if need_id and not self.id: raise ValueError('{0} has no id'.format(type(self).__name__)) - # Essential field accessors. + @classmethod + def _type(self, key): + """Get the type of a field, a `Type` instance. + + If the field has no explicit type, it is given the base `Type`, + which does no conversion. + """ + return self._fields.get(key) or self._types.get(key) or types.DEFAULT + def __getitem__(self, key): """Get the value for a field. Raise a KeyError if the field is not available. @@ -152,11 +229,19 @@ class Model(object): def __setitem__(self, key, value): """Assign the value for a field. """ - source = self._values_fixed if key in self._fields \ - else self._values_flex + # Choose where to place the value. + if key in self._fields: + source = self._values_fixed + else: + source = self._values_flex + + # If the field has a type, filter the value. + value = self._type(key).normalize(value) + + # Assign value and possibly mark as dirty. old_value = source.get(key) source[key] = value - if old_value != value: + if self._always_dirty or old_value != value: self._dirty.add(key) def __delitem__(self, key): @@ -183,7 +268,6 @@ class Model(object): else: return base_keys - # Act like a dictionary. def update(self, values): @@ -219,7 +303,6 @@ class Model(object): """ return iter(self.keys()) - # Convenient attribute access. def __getattr__(self, key): @@ -243,7 +326,6 @@ class Model(object): else: del self[key] - # Database interaction (CRUD methods). def store(self): @@ -252,19 +334,15 @@ class Model(object): self._check_db() # Build assignments for query. - assignments = '' + assignments = [] subvars = [] for key in self._fields: if key != 'id' and key in self._dirty: self._dirty.remove(key) - assignments += key + '=?,' - value = self[key] - # Wrap path strings in buffers so they get stored - # "in the raw". - if key in self._bytes_keys and isinstance(value, str): - value = buffer(value) + assignments.append(key + '=?') + value = self._type(key).to_sql(self[key]) subvars.append(value) - assignments = assignments[:-1] # Knock off last , + assignments = ','.join(assignments) with self._db.transaction() as tx: # Main table update. @@ -302,6 +380,8 @@ class Model(object): self._check_db() stored_obj = self._db._get(type(self), self.id) assert stored_obj is not None, "object {0} not in DB".format(self.id) + self._values_fixed = {} + self._values_flex = {} self.update(dict(stored_obj)) self.clear_dirty() @@ -344,76 +424,26 @@ class Model(object): self._dirty.add(key) self.store() - # Formatting and templating. - @classmethod - def _format(cls, key, value, for_path=False): - """Format a value as the given field for this model. - """ - # Format the value as a string according to its type, if any. - if key in cls._fields: - value = cls._fields[key].format(value) - # Formatting must result in a string. To deal with - # Python2isms, implicitly convert ASCII strings. - assert isinstance(value, basestring), \ - u'field formatter must produce strings' - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + _formatter = FormattedMapping - elif not isinstance(value, unicode): - # Fallback formatter. Convert to unicode at all cost. - if value is None: - value = u'' - elif isinstance(value, basestring): - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') - else: - value = unicode(value) - - if for_path: - sep_repl = beets.config['path_sep_replace'].get(unicode) - for sep in (os.path.sep, os.path.altsep): - if sep: - value = value.replace(sep, sep_repl) - - return value - - def _get_formatted(self, key, for_path=False): - """Get a field value formatted as a string (`unicode` object) - for display to the user. If `for_path` is true, then the value - will be sanitized for inclusion in a pathname (i.e., path - separators will be removed from the value). - """ - return self._format(key, self.get(key), for_path) - - def _formatted_mapping(self, for_path=False): + def formatted(self, for_path=False): """Get a mapping containing all values on this object formatted - as human-readable strings. + as human-readable unicode strings. """ - # In the future, this could be made "lazy" to avoid computing - # fields unnecessarily. - out = {} - for key in self.keys(True): - out[key] = self._get_formatted(key, for_path) - return out + return self._formatter(self, for_path) def evaluate_template(self, template, for_path=False): """Evaluate a template (a string or a `Template` object) using the object's fields. If `for_path` is true, then no new path separators will be added to the template. """ - # Build value mapping. - mapping = self._formatted_mapping(for_path) - - # Get template functions. - funcs = self._template_funcs() - # Perform substitution. if isinstance(template, basestring): template = Template(template) - return template.substitute(mapping, funcs) - + return template.substitute(self.formatted(for_path), + self._template_funcs()) # Parsing. @@ -424,63 +454,117 @@ class Model(object): if not isinstance(string, basestring): raise TypeError("_parse() argument must be a string") - typ = cls._fields.get(key) - if typ: - return typ.parse(string) - else: - # Fall back to unparsed string. - return string - + return cls._type(key).parse(string) # Database controller and supporting interfaces. - class Results(object): """An item query result set. Iterating over the collection lazily constructs LibModel objects that reflect database rows. """ - def __init__(self, model_class, rows, db, query=None): + def __init__(self, model_class, rows, db, query=None, sort=None): """Create a result set that will construct objects of type - `model_class`, which should be a subclass of `LibModel`, out of - the query result mapping in `rows`. The new objects are - associated with the database `db`. If `query` is provided, it is - used as a predicate to filter the results for a "slow query" that - cannot be evaluated by the database directly. + `model_class`. + + `model_class` is a subclass of `LibModel` that will be + constructed. `rows` is a query result: a list of mappings. The + new objects will be associated with the database `db`. + + If `query` is provided, it is used as a predicate to filter the + results for a "slow query" that cannot be evaluated by the + database directly. If `sort` is provided, it is used to sort the + full list of results before returning. This means it is a "slow + sort" and all objects must be built before returning the first + one. """ self.model_class = model_class self.rows = rows self.db = db self.query = query + self.sort = sort + + # We keep a queue of rows we haven't yet consumed for + # materialization. We preserve the original total number of + # rows. + self._rows = rows + self._row_count = len(rows) + + # The materialized objects corresponding to rows that have been + # consumed. + self._objects = [] + + def _get_objects(self): + """Construct and generate Model objects for they query. The + objects are returned in the order emitted from the database; no + slow sort is applied. + + For performance, this generator caches materialized objects to + avoid constructing them more than once. This way, iterating over + a `Results` object a second time should be much faster than the + first. + """ + index = 0 # Position in the materialized objects. + while index < len(self._objects) or self._rows: + # Are there previously-materialized objects to produce? + if index < len(self._objects): + yield self._objects[index] + index += 1 + + # Otherwise, we consume another row, materialize its object + # and produce it. + else: + while self._rows: + row = self._rows.pop(0) + obj = self._make_model(row) + # If there is a slow-query predicate, ensurer that the + # object passes it. + if not self.query or self.query.match(obj): + self._objects.append(obj) + index += 1 + yield obj + break def __iter__(self): - """Construct Python objects for all rows that pass the query - predicate. + """Construct and generate Model objects for all matching + objects, in sorted order. """ - for row in self.rows: - # Get the flexible attributes for the object. - with self.db.transaction() as tx: - flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id=?'.format( - self.model_class._flex_table - ), - (row['id'],) - ) - values = dict(row) - values.update( - dict((row['key'], row['value']) for row in flex_rows) + if self.sort: + # Slow sort. Must build the full list first. + objects = self.sort.sort(list(self._get_objects())) + return iter(objects) + + else: + # Objects are pre-sorted (i.e., by the database). + return self._get_objects() + + def _make_model(self, row): + # Get the flexible attributes for the object. + with self.db.transaction() as tx: + flex_rows = tx.query( + 'SELECT * FROM {0} WHERE entity_id=?'.format( + self.model_class._flex_table + ), + (row['id'],) ) - # Construct the Python object and yield it if it passes the - # predicate. - obj = self.model_class(self.db, **values) - if not self.query or self.query.match(obj): - yield obj + cols = dict(row) + values = dict((k, v) for (k, v) in cols.items() + if not k[:4] == 'flex') + flex_values = dict((row['key'], row['value']) for row in flex_rows) + + # Construct the Python object + obj = self.model_class._awaken(self.db, values, flex_values) + return obj def __len__(self): """Get the number of matching objects. """ - if self.query: + if not self._rows: + # Fully materialized. Just count the objects. + return len(self._objects) + + elif self.query: # A slow query. Fall back to testing every object. count = 0 for obj in self: @@ -489,7 +573,7 @@ class Results(object): else: # A fast query. Just count the rows. - return len(self.rows) + return self._row_count def __nonzero__(self): """Does this result contain any objects? @@ -500,6 +584,11 @@ class Results(object): """Get the nth item in this result set. This is inefficient: all items up to n are materialized and thrown away. """ + if not self._rows and not self.sort: + # Fully materialized and already in order. Just look up the + # object. + return self._objects[n] + it = iter(self) try: for i in range(n): @@ -604,7 +693,6 @@ class Database(object): self._make_table(model_cls._table, model_cls._fields) self._make_attribute_table(model_cls._flex_table) - # Primitive access control: connections and transactions. def _connection(self): @@ -644,7 +732,6 @@ class Database(object): """ return Transaction(self) - # Schema setup and migration. def _make_table(self, table, fields): @@ -698,27 +785,33 @@ class Database(object): ON {0} (entity_id); """.format(flex_table)) - # Querying. - def _fetch(self, model_cls, query, order_by=None): + def _fetch(self, model_cls, query=None, sort=None): """Fetch the objects of type `model_cls` matching the given query. The query may be given as a string, string sequence, a - Query object, or None (to fetch everything). If provided, - `order_by` is a SQLite ORDER BY clause for sorting. + Query object, or None (to fetch everything). `sort` is an + `Sort` object. """ + query = query or TrueQuery() # A null query. + sort = sort or NullSort() # Unsorted. where, subvals = query.clause() + order_by = sort.order_clause() - sql = "SELECT * FROM {0} WHERE {1}".format( + sql = ("SELECT * FROM {0} WHERE {1} {2}").format( model_cls._table, where or '1', + "ORDER BY {0}".format(order_by) if order_by else '', ) - if order_by: - sql += " ORDER BY {0}".format(order_by) + with self.transaction() as tx: rows = tx.query(sql, subvals) - return Results(model_cls, rows, self, None if where else query) + return Results( + model_cls, rows, self, + None if where else query, # Slow query component. + sort if sort.is_slow() else None, # Slow sort component. + ) def _get(self, model_cls, id): """Get a Model object by its id or None if the id does not diff --git a/lib/beets/dbcore/query.py b/lib/beets/dbcore/query.py index 4c888302..5a116eb2 100644 --- a/lib/beets/dbcore/query.py +++ b/lib/beets/dbcore/query.py @@ -15,6 +15,7 @@ """The Query type hierarchy for DBCore. """ import re +from operator import attrgetter from beets import util from datetime import datetime, timedelta @@ -82,6 +83,23 @@ class MatchQuery(FieldQuery): return pattern == value +class NoneQuery(FieldQuery): + + def __init__(self, field, fast=True): + self.field = field + self.fast = fast + + def col_clause(self): + return self.field + " IS NULL", () + + @classmethod + def match(self, item): + try: + return item[self.field] is None + except KeyError: + return True + + class StringFieldQuery(FieldQuery): """A FieldQuery that converts values to strings before matching them. @@ -104,8 +122,11 @@ class StringFieldQuery(FieldQuery): class SubstringQuery(StringFieldQuery): """A query that matches a substring in a specific item field.""" def col_clause(self): - search = '%' + (self.pattern.replace('\\','\\\\').replace('%','\\%') - .replace('_','\\_')) + '%' + pattern = (self.pattern + .replace('\\', '\\\\') + .replace('%', '\\%') + .replace('_', '\\_')) + search = '%' + pattern + '%' clause = self.field + " like ? escape '\\'" subvals = [search] return clause, subvals @@ -200,7 +221,9 @@ class NumericQuery(FieldQuery): self.rangemax = self._convert(parts[1]) def match(self, item): - value = getattr(item, self.field) + if self.field not in item: + return False + value = item[self.field] if isinstance(value, basestring): value = self._convert(value) @@ -236,12 +259,16 @@ class CollectionQuery(Query): self.subqueries = subqueries # Act like a sequence. + def __len__(self): return len(self.subqueries) + def __getitem__(self, key): return self.subqueries[key] + def __iter__(self): return iter(self.subqueries) + def __contains__(self, item): return item in self.subqueries @@ -334,10 +361,8 @@ class FalseQuery(Query): return False - # Time/date queries. - def _to_epoch_time(date): """Convert a `datetime` object to an integer number of seconds since the (local) Unix epoch. @@ -393,10 +418,14 @@ class Period(object): return None ordinal = string.count('-') if ordinal >= len(cls.date_formats): - raise ValueError('date is not in one of the formats ' - + ', '.join(cls.date_formats)) + # Too many components. + return None date_format = cls.date_formats[ordinal] - date = datetime.strptime(string, date_format) + try: + date = datetime.strptime(string, date_format) + except ValueError: + # Parsing failed. + return None precision = cls.precisions[ordinal] return cls(date, precision) @@ -492,3 +521,134 @@ class DateQuery(FieldQuery): # Match any date. clause = '1' return clause, subvals + + +# Sorting. + +class Sort(object): + """An abstract class representing a sort operation for a query into + the item database. + """ + + def order_clause(self): + """Generates a SQL fragment to be used in a ORDER BY clause, or + None if no fragment is used (i.e., this is a slow sort). + """ + return None + + def sort(self, items): + """Sort the list of objects and return a list. + """ + return sorted(items) + + def is_slow(self): + """Indicate whether this query is *slow*, meaning that it cannot + be executed in SQL and must be executed in Python. + """ + return False + + +class MultipleSort(Sort): + """Sort that encapsulates multiple sub-sorts. + """ + + def __init__(self, sorts=None): + self.sorts = sorts or [] + + def add_sort(self, sort): + self.sorts.append(sort) + + def _sql_sorts(self): + """Return the list of sub-sorts for which we can be (at least + partially) fast. + + A contiguous suffix of fast (SQL-capable) sub-sorts are + executable in SQL. The remaining, even if they are fast + independently, must be executed slowly. + """ + sql_sorts = [] + for sort in reversed(self.sorts): + if not sort.order_clause() is None: + sql_sorts.append(sort) + else: + break + sql_sorts.reverse() + return sql_sorts + + def order_clause(self): + order_strings = [] + for sort in self._sql_sorts(): + order = sort.order_clause() + order_strings.append(order) + + return ", ".join(order_strings) + + def is_slow(self): + for sort in self.sorts: + if sort.is_slow(): + return True + return False + + def sort(self, items): + slow_sorts = [] + switch_slow = False + for sort in reversed(self.sorts): + if switch_slow: + slow_sorts.append(sort) + elif sort.order_clause() is None: + switch_slow = True + slow_sorts.append(sort) + else: + pass + + for sort in slow_sorts: + items = sort.sort(items) + return items + + def __repr__(self): + return u'MultipleSort({0})'.format(repr(self.sorts)) + + +class FieldSort(Sort): + """An abstract sort criterion that orders by a specific field (of + any kind). + """ + def __init__(self, field, ascending=True): + self.field = field + self.ascending = ascending + + def sort(self, objs): + # TODO: Conversion and null-detection here. In Python 3, + # comparisons with None fail. We should also support flexible + # attributes with different types without falling over. + return sorted(objs, key=attrgetter(self.field), + reverse=not self.ascending) + + def __repr__(self): + return u'<{0}: {1}{2}>'.format( + type(self).__name__, + self.field, + '+' if self.ascending else '-', + ) + + +class FixedFieldSort(FieldSort): + """Sort object to sort on a fixed field. + """ + def order_clause(self): + order = "ASC" if self.ascending else "DESC" + return "{0} {1}".format(self.field, order) + + +class SlowFieldSort(FieldSort): + """A sort criterion by some model field other than a fixed field: + i.e., a computed or flexible field. + """ + def is_slow(self): + return True + + +class NullSort(Sort): + """No sorting. Leave results unsorted.""" + def sort(items): + return items diff --git a/lib/beets/dbcore/queryparse.py b/lib/beets/dbcore/queryparse.py new file mode 100644 index 00000000..90963696 --- /dev/null +++ b/lib/beets/dbcore/queryparse.py @@ -0,0 +1,180 @@ +# This file is part of beets. +# Copyright 2014, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Parsing of strings into DBCore queries. +""" +import re +import itertools +from . import query + + +PARSE_QUERY_PART_REGEX = re.compile( + # Non-capturing optional segment for the keyword. + r'(?:' + r'(\S+?)' # The field key. + r'(? (None, 'stapler', SubstringQuery) + 'color:red' -> ('color', 'red', SubstringQuery) + ':^Quiet' -> (None, '^Quiet', RegexpQuery) + 'color::b..e' -> ('color', 'b..e', RegexpQuery) + + Prefixes may be "escaped" with a backslash to disable the keying + behavior. + """ + part = part.strip() + match = PARSE_QUERY_PART_REGEX.match(part) + + assert match # Regex should always match. + key = match.group(1) + term = match.group(2).replace('\:', ':') + + # Match the search term against the list of prefixes. + for pre, query_class in prefixes.items(): + if term.startswith(pre): + return key, term[len(pre):], query_class + + # No matching prefix: use type-based or fallback/default query. + query_class = query_classes.get(key, default_class) + return key, term, query_class + + +def construct_query_part(model_cls, prefixes, query_part): + """Create a query from a single query component, `query_part`, for + querying instances of `model_cls`. Return a `Query` instance. + """ + # Shortcut for empty query parts. + if not query_part: + return query.TrueQuery() + + # Get the query classes for each possible field. + query_classes = {} + for k, t in itertools.chain(model_cls._fields.items(), + model_cls._types.items()): + query_classes[k] = t.query + + # Parse the string. + key, pattern, query_class = \ + parse_query_part(query_part, query_classes, prefixes) + + # No key specified. + if key is None: + if issubclass(query_class, query.FieldQuery): + # The query type matches a specific field, but none was + # specified. So we use a version of the query that matches + # any field. + return query.AnyFieldQuery(pattern, model_cls._search_fields, + query_class) + else: + # Other query type. + return query_class(pattern) + + key = key.lower() + return query_class(key.lower(), pattern, key in model_cls._fields) + + +def query_from_strings(query_cls, model_cls, prefixes, query_parts): + """Creates a collection query of type `query_cls` from a list of + strings in the format used by parse_query_part. `model_cls` + determines how queries are constructed from strings. + """ + subqueries = [] + for part in query_parts: + subqueries.append(construct_query_part(model_cls, prefixes, part)) + if not subqueries: # No terms in query. + subqueries = [query.TrueQuery()] + return query_cls(subqueries) + + +def construct_sort_part(model_cls, part): + """Create a `Sort` from a single string criterion. + + `model_cls` is the `Model` being queried. `part` is a single string + ending in ``+`` or ``-`` indicating the sort. + """ + assert part, "part must be a field name and + or -" + field = part[:-1] + assert field, "field is missing" + direction = part[-1] + assert direction in ('+', '-'), "part must end with + or -" + is_ascending = direction == '+' + + if field in model_cls._sorts: + sort = model_cls._sorts[field](model_cls, is_ascending) + elif field in model_cls._fields: + sort = query.FixedFieldSort(field, is_ascending) + else: + # Flexible or computed. + sort = query.SlowFieldSort(field, is_ascending) + return sort + + +def sort_from_strings(model_cls, sort_parts): + """Create a `Sort` from a list of sort criteria (strings). + """ + if not sort_parts: + return query.NullSort() + else: + sort = query.MultipleSort() + for part in sort_parts: + sort.add_sort(construct_sort_part(model_cls, part)) + return sort + + +def parse_sorted_query(model_cls, parts, prefixes={}, + query_cls=query.AndQuery): + """Given a list of strings, create the `Query` and `Sort` that they + represent. + """ + # Separate query token and sort token. + query_parts = [] + sort_parts = [] + for part in parts: + if part.endswith((u'+', u'-')) and u':' not in part: + sort_parts.append(part) + else: + query_parts.append(part) + + # Parse each. + q = query_from_strings( + query_cls, model_cls, prefixes, query_parts + ) + s = sort_from_strings(model_cls, sort_parts) + return q, s diff --git a/lib/beets/dbcore/types.py b/lib/beets/dbcore/types.py index 165c0b60..82346e70 100644 --- a/lib/beets/dbcore/types.py +++ b/lib/beets/dbcore/types.py @@ -18,55 +18,111 @@ from . import query from beets.util import str2bool - # Abstract base. - class Type(object): """An object encapsulating the type of a model field. Includes - information about how to store the value in the database, query, - format, and parse a given field. + information about how to store, query, format, and parse a given + field. """ - sql = None + sql = u'TEXT' """The SQLite column type for the value. """ - query = None + query = query.SubstringQuery """The `Query` subclass to be used when querying the field. """ + model_type = unicode + """The Python type that is used to represent the value in the model. + + The model is guaranteed to return a value of this type if the field + is accessed. To this end, the constructor is used by the `normalize` + and `from_sql` methods and the `default` property. + """ + + @property + def null(self): + """The value to be exposed when the underlying value is None. + """ + return self.model_type() + def format(self, value): """Given a value of this type, produce a Unicode string representing the value. This is used in template evaluation. """ - raise NotImplementedError() + if value is None: + value = self.null + # `self.null` might be `None` + if value is None: + value = u'' + if isinstance(value, bytes): + value = value.decode('utf8', 'ignore') + + return unicode(value) def parse(self, string): """Parse a (possibly human-written) string and return the indicated value of this type. """ - raise NotImplementedError() + try: + return self.model_type(string) + except ValueError: + return self.null + def normalize(self, value): + """Given a value that will be assigned into a field of this + type, normalize the value to have the appropriate type. This + base implementation only reinterprets `None`. + """ + if value is None: + return self.null + else: + # TODO This should eventually be replaced by + # `self.model_type(value)` + return value + + def from_sql(self, sql_value): + """Receives the value stored in the SQL backend and return the + value to be stored in the model. + + For fixed fields the type of `value` is determined by the column + type affinity given in the `sql` property and the SQL to Python + mapping of the database adapter. For more information see: + http://www.sqlite.org/datatype3.html + https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types + + Flexible fields have the type afinity `TEXT`. This means the + `sql_value` is either a `buffer` or a `unicode` object` and the + method must handle these in addition. + """ + if isinstance(sql_value, buffer): + sql_value = bytes(sql_value).decode('utf8', 'ignore') + if isinstance(sql_value, unicode): + return self.parse(sql_value) + else: + return self.normalize(sql_value) + + def to_sql(self, model_value): + """Convert a value as stored in the model object to a value used + by the database adapter. + """ + return model_value # Reusable types. +class Default(Type): + null = None + class Integer(Type): """A basic integer type. """ sql = u'INTEGER' query = query.NumericQuery - - def format(self, value): - return unicode(value or 0) - - def parse(self, string): - try: - return int(string) - except ValueError: - return 0 + model_type = int class PaddedInt(Integer): @@ -93,9 +149,14 @@ class ScaledInt(Integer): class Id(Integer): - """An integer used as the row key for a SQLite table. + """An integer used as the row id or a foreign key in a SQLite table. + This type is nullable: None values are not translated to zero. """ - sql = u'INTEGER PRIMARY KEY' + null = None + + def __init__(self, primary=True): + if primary: + self.sql = u'INTEGER PRIMARY KEY' class Float(Type): @@ -103,15 +164,16 @@ class Float(Type): """ sql = u'REAL' query = query.NumericQuery + model_type = float def format(self, value): return u'{0:.1f}'.format(value or 0.0) - def parse(self, string): - try: - return float(string) - except ValueError: - return 0.0 + +class NullFloat(Float): + """Same as `Float`, but does not normalize `None` to `0.0`. + """ + null = None class String(Type): @@ -120,21 +182,27 @@ class String(Type): sql = u'TEXT' query = query.SubstringQuery - def format(self, value): - return unicode(value) if value else u'' - - def parse(self, string): - return string - class Boolean(Type): """A boolean type. """ sql = u'INTEGER' query = query.BooleanQuery + model_type = bool def format(self, value): return unicode(bool(value)) def parse(self, string): return str2bool(string) + + +# Shared instances of common types. +DEFAULT = Default() +INTEGER = Integer() +PRIMARY_ID = Id(True) +FOREIGN_ID = Id(False) +FLOAT = Float() +NULL_FLOAT = NullFloat() +STRING = String() +BOOLEAN = Boolean() diff --git a/lib/beets/importer.py b/lib/beets/importer.py index f997770c..4a7bd997 100644 --- a/lib/beets/importer.py +++ b/lib/beets/importer.py @@ -18,10 +18,15 @@ autotagging music files. from __future__ import print_function import os +import re import logging import pickle import itertools from collections import defaultdict +from tempfile import mkdtemp +from bisect import insort, bisect_left +from contextlib import contextmanager +import shutil from beets import autotag from beets import library @@ -29,23 +34,25 @@ from beets import dbcore from beets import plugins from beets import util from beets import config -from beets.util import pipeline +from beets.util import pipeline, sorted_walk, ancestry from beets.util import syspath, normpath, displayable_path -from beets.util.enumeration import enum +from enum import Enum from beets import mediafile -action = enum( - 'SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', - 'ALBUMS', name='action' -) +action = Enum('action', + ['SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', + 'ALBUMS']) QUEUE_SIZE = 128 SINGLE_ARTIST_THRESH = 0.25 VARIOUS_ARTISTS = u'Various Artists' +PROGRESS_KEY = 'tagprogress' +HISTORY_KEY = 'taghistory' # Global logger. log = logging.getLogger('beets') + class ImportAbort(Exception): """Raised when the user aborts the tagging operation. """ @@ -54,151 +61,91 @@ class ImportAbort(Exception): # Utilities. -def _duplicate_check(lib, task): - """Check whether an album already exists in the library. Returns a - list of Album objects (empty if no duplicates are found). - """ - assert task.choice_flag in (action.ASIS, action.APPLY) - artist, album = task.chosen_ident() - - if artist is None: - # As-is import with no artist. Skip check. - return [] - - found_albums = [] - cur_paths = set(i.path for i in task.items if i) - for album_cand in lib.albums(dbcore.MatchQuery('albumartist', artist)): - if album_cand.album == album: - # Check whether the album is identical in contents, in which - # case it is not a duplicate (will be replaced). - other_paths = set(i.path for i in album_cand.items()) - if other_paths == cur_paths: - continue - found_albums.append(album_cand) - return found_albums - -def _item_duplicate_check(lib, task): - """Check whether an item already exists in the library. Returns a - list of Item objects. - """ - assert task.choice_flag in (action.ASIS, action.APPLY) - artist, title = task.chosen_ident() - - found_items = [] - query = dbcore.AndQuery(( - dbcore.MatchQuery('artist', artist), - dbcore.MatchQuery('title', title), - )) - for other_item in lib.items(query): - # Existing items not considered duplicates. - if other_item.path == task.item.path: - continue - found_items.append(other_item) - return found_items - -def _infer_album_fields(task): - """Given an album and an associated import task, massage the - album-level metadata. This ensures that the album artist is set - and that the "compilation" flag is set automatically. - """ - assert task.is_album - assert task.items - - changes = {} - - if task.choice_flag == action.ASIS: - # Taking metadata "as-is". Guess whether this album is VA. - plur_albumartist, freq = util.plurality( - [i.albumartist or i.artist for i in task.items]) - if freq == len(task.items) or (freq > 1 and - float(freq) / len(task.items) >= SINGLE_ARTIST_THRESH): - # Single-artist album. - changes['albumartist'] = plur_albumartist - changes['comp'] = False - else: - # VA. - changes['albumartist'] = VARIOUS_ARTISTS - changes['comp'] = True - - elif task.choice_flag == action.APPLY: - # Applying autotagged metadata. Just get AA from the first - # item. - for item in task.items: - if item is not None: - first_item = item - break - else: - assert False, "all items are None" - if not first_item.albumartist: - changes['albumartist'] = first_item.artist - if not first_item.mb_albumartistid: - changes['mb_albumartistid'] = first_item.mb_artistid - - else: - assert False - - # Apply new metadata. - for item in task.items: - if item is not None: - for k, v in changes.iteritems(): - setattr(item, k, v) - -def _resume(): - """Check whether an import should resume and return a boolean or the - string 'ask' indicating that the user should be queried. - """ - return config['import']['resume'].as_choice([True, False, 'ask']) - def _open_state(): """Reads the state file, returning a dictionary.""" try: with open(config['statefile'].as_filename()) as f: return pickle.load(f) - except (IOError, EOFError): + except Exception as exc: + # The `pickle` module can emit all sorts of exceptions during + # unpickling, including ImportError. We use a catch-all + # exception to avoid enumerating them all (the docs don't even have a + # full list!). + log.debug(u'state file could not be read: {0}'.format(exc)) return {} + + def _save_state(state): """Writes the state dictionary out to disk.""" try: with open(config['statefile'].as_filename(), 'w') as f: pickle.dump(state, f) except IOError as exc: - log.error(u'state file could not be written: %s' % unicode(exc)) + log.error(u'state file could not be written: {0}'.format(exc)) # Utilities for reading and writing the beets progress file, which # allows long tagging tasks to be resumed when they pause (or crash). -PROGRESS_KEY = 'tagprogress' -def progress_set(toppath, paths): - """Record that tagging for the given `toppath` was successful up to - `paths`. If paths is None, then clear the progress value (indicating - that the tagging completed). - """ + +def progress_read(): state = _open_state() - if PROGRESS_KEY not in state: - state[PROGRESS_KEY] = {} + return state.setdefault(PROGRESS_KEY, {}) - if paths is None: - # Remove progress from file. - if toppath in state[PROGRESS_KEY]: - del state[PROGRESS_KEY][toppath] - else: - state[PROGRESS_KEY][toppath] = paths +@contextmanager +def progress_write(): + state = _open_state() + progress = state.setdefault(PROGRESS_KEY, {}) + yield progress _save_state(state) -def progress_get(toppath): - """Get the last successfully tagged subpath of toppath. If toppath - has no progress information, returns None. + + +def progress_add(toppath, *paths): + """Record that the files under all of the `paths` have been imported + under `toppath`. """ - state = _open_state() - if PROGRESS_KEY not in state: - return None - return state[PROGRESS_KEY].get(toppath) + with progress_write() as state: + imported = state.setdefault(toppath, []) + for path in paths: + # Normally `progress_add` will be called with the path + # argument increasing. This is because of the ordering in + # `albums_in_dir`. We take advantage of that to make the + # code faster + if imported and imported[len(imported) - 1] <= path: + imported.append(path) + else: + insort(imported, path) + + +def progress_element(toppath, path): + """Return whether `path` has been imported in `toppath`. + """ + state = progress_read() + if toppath not in state: + return False + imported = state[toppath] + i = bisect_left(imported, path) + return i != len(imported) and imported[i] == path + + +def has_progress(toppath): + """Return `True` if there exist paths that have already been + imported under `toppath`. + """ + state = progress_read() + return toppath in state + + +def progress_reset(toppath): + with progress_write() as state: + if toppath in state: + del state[toppath] # Similarly, utilities for manipulating the "incremental" import log. # This keeps track of all directories that were ever imported, which # allows the importer to only import new stuff. -HISTORY_KEY = 'taghistory' + def history_add(paths): """Indicate that the import of the album in `paths` is completed and should not be repeated in incremental imports. @@ -210,6 +157,8 @@ def history_add(paths): state[HISTORY_KEY].add(tuple(paths)) _save_state(state) + + def history_get(): """Get the set of completed path tuples in incremental imports. """ @@ -235,17 +184,21 @@ class ImportSession(object): self.logfile = logfile self.paths = paths self.query = query + self.seen_idents = set() + self._is_resuming = dict() # Normalize the paths. if self.paths: self.paths = map(normpath, self.paths) - def _amend_config(self): - """Make implied changes the importer configuration. + def set_config(self, config): + """Set `config` property from global import config and make + implied changes. """ # FIXME: Maybe this function should not exist and should instead # provide "decision wrappers" like "should_resume()", etc. - iconfig = config['import'] + iconfig = dict(config) + self.config = iconfig # Incremental and progress are mutually exclusive. if iconfig['incremental']: @@ -257,14 +210,20 @@ class ImportSession(object): iconfig['resume'] = False iconfig['incremental'] = False - # Copy and move are mutually exclusive. + # Copy, move, and link are mutually exclusive. if iconfig['move']: iconfig['copy'] = False + iconfig['link'] = False + elif iconfig['link']: + iconfig['copy'] = False + iconfig['move'] = False # Only delete when copying. if not iconfig['copy']: iconfig['delete'] = False + self.want_resume = config['resume'].as_choice([True, False, 'ask']) + def tag_log(self, status, paths): """Log a message about a given album to logfile. The status should reflect the reason the album couldn't be tagged. @@ -279,10 +238,10 @@ class ImportSession(object): ``duplicate``, then this is a secondary choice after a duplicate was detected and a decision was made. """ - paths = task.paths if task.is_album else [task.item.path] + paths = task.paths if duplicate: # Duplicate: log all three choices (skip, keep both, and trump). - if task.remove_duplicates: + if task.should_remove_duplicates: self.tag_log('duplicate-replace', paths) elif task.choice_flag in (action.ASIS, action.APPLY): self.tag_log('duplicate-keep', paths) @@ -301,7 +260,7 @@ class ImportSession(object): def choose_match(self, task): raise NotImplementedError - def resolve_duplicate(self, task): + def resolve_duplicate(self, task, found_duplicates): raise NotImplementedError def choose_item(self, task): @@ -310,38 +269,37 @@ class ImportSession(object): def run(self): """Run the import task. """ - self._amend_config() + self.set_config(config['import']) # Set up the pipeline. if self.query is None: stages = [read_tasks(self)] else: stages = [query_tasks(self)] - if config['import']['singletons']: - # Singleton importer. - if config['import']['autotag']: - stages += [item_lookup(self), item_query(self)] - else: - stages += [item_progress(self)] + + if self.config['pretend']: + # Only log the imported files and end the pipeline + stages += [log_files(self)] else: - # Whole-album importer. - if config['import']['group_albums']: + if self.config['group_albums'] and \ + not self.config['singletons']: # Split directory tasks into one task for each album stages += [group_albums(self)] - if config['import']['autotag']: - # Only look up and query the user when autotagging. - stages += [initial_lookup(self), user_query(self)] + if self.config['autotag']: + # FIXME We should also resolve duplicates when not + # autotagging. This is currently handled in `user_query` + stages += [lookup_candidates(self), user_query(self)] else: - # When not autotagging, just display progress. - stages += [show_progress(self)] - stages += [apply_choices(self)] - for stage_func in plugins.import_stages(): - stages.append(plugin_stage(self, stage_func)) - stages += [manipulate_files(self)] - stages += [finalize(self)] + stages += [import_asis(self)] + stages += [apply_choices(self)] + + for stage_func in plugins.import_stages(): + stages.append(plugin_stage(self, stage_func)) + stages += [manipulate_files(self)] pl = pipeline.Pipeline(stages) # Run the pipeline. + plugins.send('import_begin', session=self) try: if config['threaded']: pl.run_parallel(QUEUE_SIZE) @@ -351,80 +309,100 @@ class ImportSession(object): # User aborted operation. Silently stop. pass + # Incremental and resumed imports + + def already_imported(self, toppath, paths): + """Returns true if the files belonging to this task have already + been imported in a previous session. + """ + if self.is_resuming(toppath) \ + and all(map(lambda p: progress_element(toppath, p), paths)): + return True + if self.config['incremental'] \ + and tuple(paths) in self.history_dirs: + return True + + return False + + @property + def history_dirs(self): + if not hasattr(self, '_history_dirs'): + self._history_dirs = history_get() + return self._history_dirs + + def is_resuming(self, toppath): + """Return `True` if user wants to resume import of this path. + + You have to call `ask_resume` first to determine the return value. + """ + return self._is_resuming.get(toppath, False) + + def ask_resume(self, toppath): + """If import of `toppath` was aborted in an earlier session, ask + user if she wants to resume the import. + + Determines the return value of `is_resuming(toppath)`. + """ + if self.want_resume and has_progress(toppath): + # Either accept immediately or prompt for input to decide. + if self.want_resume is True or \ + self.should_resume(toppath): + log.warn(u'Resuming interrupted import of {0}'.format( + util.displayable_path(toppath))) + self._is_resuming[toppath] = True + else: + # Clear progress; we're starting from the top. + progress_reset(toppath) + # The importer task class. class ImportTask(object): """Represents a single set of items to be imported along with its intermediate state. May represent an album or a single item. + + The import session and stages call the following methods in the + given order. + + * `lookup_candidates()` Sets the `common_artist`, `common_album`, + `candidates`, and `rec` attributes. `candidates` is a list of + `AlbumMatch` objects. + + * `choose_match()` Uses the session to set the `match` attribute + from the `candidates` list. + + * `find_duplicates()` Returns a list of albums from `lib` with the + same artist and album name as the task. + + * `apply_metadata()` Sets the attributes of the items from the + task's `match` attribute. + + * `add()` Add the imported items and album to the database. + + * `manipulate_files()` Copy, move, and write files depending on the + session configuration. + + * `finalize()` Update the import progress and cleanup the file + system. """ def __init__(self, toppath=None, paths=None, items=None): self.toppath = toppath self.paths = paths self.items = items - self.sentinel = False - self.remove_duplicates = False - self.is_album = True self.choice_flag = None - @classmethod - def done_sentinel(cls, toppath): - """Create an ImportTask that indicates the end of a top-level - directory import. - """ - obj = cls(toppath) - obj.sentinel = True - return obj - - @classmethod - def progress_sentinel(cls, toppath, paths): - """Create a task indicating that a single directory in a larger - import has finished. This is only required for singleton - imports; progress is implied for album imports. - """ - obj = cls(toppath, paths) - obj.sentinel = True - return obj - - @classmethod - def item_task(cls, item): - """Creates an ImportTask for a single item.""" - obj = cls() - obj.item = item - obj.is_album = False - return obj - - def set_candidates(self, cur_artist, cur_album, candidates, rec): - """Sets the candidates for this album matched by the - `autotag.tag_album` method. - """ - assert self.is_album - assert not self.sentinel - self.cur_artist = cur_artist - self.cur_album = cur_album - self.candidates = candidates - self.rec = rec - - def set_null_candidates(self): - """Set the candidates to indicate no album match was found. - """ - self.cur_artist = None self.cur_album = None - self.candidates = None + self.cur_artist = None + self.candidates = [] self.rec = None - - def set_item_candidates(self, candidates, rec): - """Set the match for a single-item task.""" - assert not self.is_album - assert self.item is not None - self.candidates = candidates - self.rec = rec + # TODO remove this eventually + self.should_remove_duplicates = False + self.is_album = True def set_choice(self, choice): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. """ - assert not self.sentinel # Not part of the task structure: assert choice not in (action.MANUAL, action.MANUAL_ID) assert choice != action.APPLY # Only used internally. @@ -432,10 +410,6 @@ class ImportTask(object): self.choice_flag = choice self.match = None else: - if self.is_album: - assert isinstance(choice, autotag.AlbumMatch) - else: - assert isinstance(choice, autotag.TrackMatch) self.choice_flag = action.APPLY # Implicit choice. self.match = choice @@ -443,38 +417,24 @@ class ImportTask(object): """Updates the progress state to indicate that this album has finished. """ - if self.sentinel and self.paths is None: - # "Done" sentinel. - progress_set(self.toppath, None) - elif self.sentinel or self.is_album: - # "Directory progress" sentinel for singletons or a real - # album task, which implies the same. - progress_set(self.toppath, self.paths) + if self.toppath: + progress_add(self.toppath, *self.paths) def save_history(self): """Save the directory in the history for incremental imports. """ - if self.is_album and self.paths and not self.sentinel: + if self.paths: history_add(self.paths) - # Logical decisions. - def should_write_tags(self): - """Should new info be written to the files' metadata?""" - if self.choice_flag == action.APPLY: - return True - elif self.choice_flag in (action.ASIS, action.TRACKS, action.SKIP): - return False - else: - assert False - - def should_skip(self): - """After a choice has been made, returns True if this is a - sentinel or it has been marked for skipping. - """ - return self.sentinel or self.choice_flag == action.SKIP + @property + def apply(self): + return self.choice_flag == action.APPLY + @property + def skip(self): + return self.choice_flag == action.SKIP # Convenient data. @@ -485,34 +445,306 @@ class ImportTask(object): (in which case the data comes from the files' current metadata) or APPLY (data comes from the choice). """ - assert self.choice_flag in (action.ASIS, action.APPLY) - if self.is_album: - if self.choice_flag is action.ASIS: - return (self.cur_artist, self.cur_album) - elif self.choice_flag is action.APPLY: - return (self.match.info.artist, self.match.info.album) - else: - if self.choice_flag is action.ASIS: - return (self.item.artist, self.item.title) - elif self.choice_flag is action.APPLY: - return (self.match.info.artist, self.match.info.title) + if self.choice_flag is action.ASIS: + return (self.cur_artist, self.cur_album) + elif self.choice_flag is action.APPLY: + return (self.match.info.artist, self.match.info.album) def imported_items(self): """Return a list of Items that should be added to the library. - If this is an album task, return the list of items in the - selected match or everything if the choice is ASIS. If this is a - singleton task, return a list containing the item. + + If the tasks applies an album match the method only returns the + matched items. + """ + if self.choice_flag == action.ASIS: + return list(self.items) + # FIXME this should be a simple attribute. There should be no + # need to retrieve the keys of `match.mapping`. This requires + # that we remove unmatched items from the list. + elif self.choice_flag == action.APPLY: + return self.match.mapping.keys() + else: + assert False + + def apply_metadata(self): + """Copy metadata from match info to the items. + """ + # TODO call should be more descriptive like + # apply_metadata(self.match, self.items) + autotag.apply_metadata(self.match.info, self.match.mapping) + + def duplicate_items(self, lib): + duplicate_items = [] + for album in self.find_duplicates(lib): + duplicate_items += album.items() + return duplicate_items + + def remove_duplicates(self, lib): + duplicate_items = self.duplicate_items(lib) + log.debug(u'removing {0} old duplicated items' + .format(len(duplicate_items))) + for item in duplicate_items: + item.remove() + if lib.directory in util.ancestry(item.path): + log.debug(u'deleting duplicate {0}' + .format(util.displayable_path(item.path))) + util.remove(item.path) + util.prune_dirs(os.path.dirname(item.path), + lib.directory) + + def finalize(self, session): + """Save progress, clean up files, and emit plugin event. + """ + # FIXME the session argument is unfortunate. It should be + # present as an attribute of the task. + + # Update progress. + if session.want_resume: + self.save_progress() + if session.config['incremental']: + self.save_history() + + self.cleanup(copy=session.config['copy'], + delete=session.config['delete'], + move=session.config['move']) + + if not self.skip: + self._emit_imported(session.lib) + + def cleanup(self, copy=False, delete=False, move=False): + """Remove and prune imported paths. + """ + # FIXME Maybe the keywords should be task properties. + + # Do not delete any files or prune directories when skipping. + if self.skip: + return + + items = self.imported_items() + + # When copying and deleting originals, delete old files. + if copy and delete: + new_paths = [os.path.realpath(item.path) for item in items] + for old_path in self.old_paths: + # Only delete files that were actually copied. + if old_path not in new_paths: + util.remove(syspath(old_path), False) + self.prune(old_path) + + # When moving, prune empty directories containing the original files. + elif move: + for old_path in self.old_paths: + self.prune(old_path) + + def _emit_imported(self, lib): + # FIXME This shouldn't be here. Skipping should be handled in + # the stages. + if self.skip: + return + plugins.send('album_imported', lib=lib, album=self.album) + + def lookup_candidates(self): + """Retrieve and store candidates for this album. + """ + artist, album, candidates, recommendation = \ + autotag.tag_album(self.items) + self.cur_artist = artist + self.cur_album = album + self.candidates = candidates + self.rec = recommendation + + def find_duplicates(self, lib): + """Return a list of albums from `lib` with the same artist and + album name as the task. + """ + artist, album = self.chosen_ident() + + if artist is None: + # As-is import with no artist. Skip check. + return [] + + duplicates = [] + task_paths = set(i.path for i in self.items if i) + duplicate_query = dbcore.AndQuery(( + dbcore.MatchQuery('albumartist', artist), + dbcore.MatchQuery('album', album), + )) + + for album in lib.albums(duplicate_query): + # Check whether the album is identical in contents, in which + # case it is not a duplicate (will be replaced). + album_paths = set(i.path for i in album.items()) + if album_paths != task_paths: + duplicates.append(album) + return duplicates + + def align_album_level_fields(self): + """Make the some album fields equal across `self.items` + """ + changes = {} + + if self.choice_flag == action.ASIS: + # Taking metadata "as-is". Guess whether this album is VA. + plur_albumartist, freq = util.plurality( + [i.albumartist or i.artist for i in self.items] + ) + if freq == len(self.items) or \ + (freq > 1 and + float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH): + # Single-artist album. + changes['albumartist'] = plur_albumartist + changes['comp'] = False + else: + # VA. + changes['albumartist'] = VARIOUS_ARTISTS + changes['comp'] = True + + elif self.choice_flag == action.APPLY: + # Applying autotagged metadata. Just get AA from the first + # item. + if not self.items[0].albumartist: + changes['albumartist'] = self.items[0].artist + if not self.items[0].mb_albumartistid: + changes['mb_albumartistid'] = self.items[0].mb_artistid + + # Apply new metadata. + for item in self.items: + item.update(changes) + + def manipulate_files(self, move=False, copy=False, write=False, + link=False, session=None): + items = self.imported_items() + # Save the original paths of all items for deletion and pruning + # in the next step (finalization). + self.old_paths = [item.path for item in items] + for item in items: + if move or copy or link: + # In copy and link modes, treat re-imports specially: + # move in-library files. (Out-of-library files are + # copied/moved as usual). + old_path = item.path + if (copy or link) and self.replaced_items[item] and \ + session.lib.directory in util.ancestry(old_path): + item.move() + # We moved the item, so remove the + # now-nonexistent file from old_paths. + self.old_paths.remove(old_path) + else: + # A normal import. Just copy files and keep track of + # old paths. + item.move(copy, link) + + if write and self.apply: + item.try_write() + + with session.lib.transaction(): + for item in self.imported_items(): + item.store() + + plugins.send('import_task_files', session=session, task=self) + + def add(self, lib): + """Add the items as an album to the library and remove replaced items. + """ + self.align_album_level_fields() + with lib.transaction(): + self.record_replaced(lib) + self.remove_replaced(lib) + self.album = lib.add_album(self.imported_items()) + self.reimport_metadata(lib) + + def record_replaced(self, lib): + """Records the replaced items and albums in the `replaced_items` + and `replaced_albums` dictionaries. + """ + self.replaced_items = defaultdict(list) + self.replaced_albums = defaultdict(list) + replaced_album_ids = set() + for item in self.imported_items(): + dup_items = list(lib.items( + dbcore.query.BytesQuery('path', item.path) + )) + self.replaced_items[item] = dup_items + for dup_item in dup_items: + if (not dup_item.album_id or + dup_item.album_id in replaced_album_ids): + continue + replaced_album = dup_item.get_album() + if replaced_album: + replaced_album_ids.add(dup_item.album_id) + self.replaced_albums[replaced_album.path] = replaced_album + + def reimport_metadata(self, lib): + """For reimports, preserves metadata for reimported items and + albums. """ if self.is_album: - if self.choice_flag == action.ASIS: - return list(self.items) - elif self.choice_flag == action.APPLY: - return self.match.mapping.keys() - else: - assert False - else: - return [self.item] + replaced_album = self.replaced_albums.get(self.album.path) + if replaced_album: + self.album.added = replaced_album.added + self.album.update(replaced_album._values_flex) + self.album.store() + log.debug( + u'Reimported album: added {0}, flexible ' + u'attributes {1} from album {2} for {3}'.format( + self.album.added, + replaced_album._values_flex.keys(), + replaced_album.id, + displayable_path(self.album.path), + ) + ) + for item in self.imported_items(): + dup_items = self.replaced_items[item] + for dup_item in dup_items: + if dup_item.added and dup_item.added != item.added: + item.added = dup_item.added + log.debug( + u'Reimported item added {0} ' + u'from item {1} for {2}'.format( + item.added, + dup_item.id, + displayable_path(item.path), + ) + ) + item.update(dup_item._values_flex) + log.debug( + u'Reimported item flexible attributes {0} ' + u'from item {1} for {2}'.format( + dup_item._values_flex.keys(), + dup_item.id, + displayable_path(item.path), + ) + ) + item.store() + + def remove_replaced(self, lib): + """Removes all the items from the library that have the same + path as an item from this task. + """ + for item in self.imported_items(): + for dup_item in self.replaced_items[item]: + log.debug(u'Replacing item {0}: {1}' + .format(dup_item.id, + displayable_path(item.path))) + dup_item.remove() + log.debug(u'{0} of {1} items replaced' + .format(sum(bool(l) for l in self.replaced_items.values()), + len(self.imported_items()))) + + def choose_match(self, session): + """Ask the session which match should apply and apply it. + """ + choice = session.choose_match(self) + self.set_choice(choice) + session.log_choice(self) + + def reload(self): + """Reload albums and items from the database. + """ + for item in self.imported_items(): + item.load() + self.album.load() # Utilities. @@ -529,6 +761,310 @@ class ImportTask(object): clutter=config['clutter'].as_str_seq()) +class SingletonImportTask(ImportTask): + """ImportTask for a single track that is not associated to an album. + """ + + def __init__(self, toppath, item): + super(SingletonImportTask, self).__init__(toppath, [item.path]) + self.item = item + self.is_album = False + self.paths = [item.path] + + def chosen_ident(self): + assert self.choice_flag in (action.ASIS, action.APPLY) + if self.choice_flag is action.ASIS: + return (self.item.artist, self.item.title) + elif self.choice_flag is action.APPLY: + return (self.match.info.artist, self.match.info.title) + + def imported_items(self): + return [self.item] + + def apply_metadata(self): + autotag.apply_item_metadata(self.item, self.match.info) + + def _emit_imported(self, lib): + # FIXME This shouldn't be here. Skipped tasks should be removed from + # the pipeline. + if self.skip: + return + for item in self.imported_items(): + plugins.send('item_imported', lib=lib, item=item) + + def lookup_candidates(self): + candidates, recommendation = autotag.tag_item(self.item) + self.candidates = candidates + self.rec = recommendation + + def find_duplicates(self, lib): + """Return a list of items from `lib` that have the same artist + and title as the task. + """ + artist, title = self.chosen_ident() + + found_items = [] + query = dbcore.AndQuery(( + dbcore.MatchQuery('artist', artist), + dbcore.MatchQuery('title', title), + )) + for other_item in lib.items(query): + # Existing items not considered duplicates. + if other_item.path != self.item.path: + found_items.append(other_item) + return found_items + + duplicate_items = find_duplicates + + def add(self, lib): + with lib.transaction(): + self.record_replaced(lib) + self.remove_replaced(lib) + lib.add(self.item) + self.reimport_metadata(lib) + + def infer_album_fields(self): + raise NotImplementedError + + def choose_match(self, session): + """Ask the session which match should apply and apply it. + """ + choice = session.choose_item(self) + self.set_choice(choice) + session.log_choice(self) + + def reload(self): + self.item.load() + + +# FIXME The inheritance relationships are inverted. This is why there +# are so many methods which pass. We should introduce a new +# BaseImportTask class. +class SentinelImportTask(ImportTask): + """This class marks the progress of an import and does not import + any items itself. + + If only `toppath` is set the task indicats the end of a top-level + directory import. If the `paths` argument is givent, too, the task + indicates the progress in the `toppath` import. + """ + + def __init__(self, toppath=None, paths=None): + self.toppath = toppath + self.paths = paths + # TODO Remove the remaining attributes eventually + self.items = None + self.should_remove_duplicates = False + self.is_album = True + self.choice_flag = None + + def save_history(self): + pass + + def save_progress(self): + if self.paths is None: + # "Done" sentinel. + progress_reset(self.toppath) + else: + # "Directory progress" sentinel for singletons + progress_add(self.toppath, *self.paths) + + def skip(self): + return True + + def set_choice(self, choice): + raise NotImplementedError + + def cleanup(self, **kwargs): + pass + + def _emit_imported(self, session): + pass + + +class ArchiveImportTask(SentinelImportTask): + """Additional methods for handling archives. + + Use when `toppath` points to a `zip`, `tar`, or `rar` archive. + """ + + def __init__(self, toppath): + super(ArchiveImportTask, self).__init__(toppath) + self.extracted = False + + @classmethod + def is_archive(cls, path): + """Returns true if the given path points to an archive that can + be handled. + """ + if not os.path.isfile(path): + return False + + for path_test, _ in cls.handlers(): + if path_test(path): + return True + return False + + @classmethod + def handlers(cls): + """Returns a list of archive handlers. + + Each handler is a `(path_test, ArchiveClass)` tuple. `path_test` + is a function that returns `True` if the given path can be + handled by `ArchiveClass`. `ArchiveClass` is a class that + implements the same interface as `tarfile.TarFile`. + """ + if not hasattr(cls, '_handlers'): + cls._handlers = [] + from zipfile import is_zipfile, ZipFile + cls._handlers.append((is_zipfile, ZipFile)) + from tarfile import is_tarfile, TarFile + cls._handlers.append((is_tarfile, TarFile)) + try: + from rarfile import is_rarfile, RarFile + except ImportError: + pass + else: + cls._handlers.append((is_rarfile, RarFile)) + + return cls._handlers + + def cleanup(self, **kwargs): + """Removes the temporary directory the archive was extracted to. + """ + if self.extracted: + shutil.rmtree(self.toppath) + + def extract(self): + """Extracts the archive to a temporary directory and sets + `toppath` to that directory. + """ + for path_test, handler_class in self.handlers(): + if path_test(self.toppath): + break + + try: + extract_to = mkdtemp() + archive = handler_class(self.toppath, mode='r') + archive.extractall(extract_to) + finally: + archive.close() + self.extracted = True + self.toppath = extract_to + + +class ImportTaskFactory(object): + """Create album and singleton import tasks for all media files in a + directory or path. + + Depending on the session's 'flat' and 'singleton' configuration, it + groups all media files contained in `toppath` into singleton or + album import tasks. + """ + def __init__(self, toppath, session): + self.toppath = toppath + self.session = session + self.skipped = 0 + + def tasks(self): + """Yield all import tasks for `self.toppath`. + + The behavior is configured by the session's 'flat', and + 'singleton' flags. + """ + for dirs, paths in self.paths(): + if self.session.config['singletons']: + for path in paths: + task = self.singleton(path) + if task: + yield task + yield self.sentinel(dirs) + + else: + task = self.album(paths, dirs) + if task: + yield task + + def paths(self): + """Walk `self.toppath` and yield pairs of directory lists and + path lists. + """ + if not os.path.isdir(syspath(self.toppath)): + yield ([self.toppath], [self.toppath]) + elif self.session.config['flat']: + paths = [] + for dirs, paths_in_dir in albums_in_dir(self.toppath): + paths += paths_in_dir + yield ([self.toppath], paths) + else: + for dirs, paths in albums_in_dir(self.toppath): + yield (dirs, paths) + + def singleton(self, path): + if self.session.already_imported(self.toppath, [path]): + log.debug(u'Skipping previously-imported path: {0}' + .format(displayable_path(path))) + self.skipped += 1 + return None + + item = self.read_item(path) + if item: + return SingletonImportTask(self.toppath, item) + else: + return None + + def album(self, paths, dirs=None): + """Return `ImportTask` with all media files from paths. + + `dirs` is a list of parent directories used to record already + imported albums. + """ + if not paths: + return None + + if dirs is None: + dirs = list(set(os.path.dirname(p) for p in paths)) + + if self.session.already_imported(self.toppath, dirs): + log.debug(u'Skipping previously-imported path: {0}' + .format(displayable_path(dirs))) + self.skipped += 1 + return None + + items = map(self.read_item, paths) + items = [item for item in items if item] + + if items: + return ImportTask(self.toppath, dirs, items) + else: + return None + + def sentinel(self, paths=None): + return SentinelImportTask(self.toppath, paths) + + def read_item(self, path): + """Return an item created from the path. + + If an item could not be read it returns None and logs an error. + """ + # TODO remove this method. Should be handled in ImportTask creation. + try: + return library.Item.from_path(path) + except library.ReadError as exc: + if isinstance(exc.reason, mediafile.FileTypeError): + # Silently ignore non-music files. + pass + elif isinstance(exc.reason, mediafile.UnreadableFileError): + log.warn(u'unreadable file: {0}'.format( + displayable_path(path)) + ) + else: + log.error(u'error reading {0}: {1}'.format( + displayable_path(path), + exc, + )) + + # Full-album pipeline stages. def read_tasks(session): @@ -536,133 +1072,104 @@ def read_tasks(session): in the user-specified list of paths. In the case of a singleton import, yields single-item tasks instead. """ - # Look for saved progress. - if _resume(): - resume_dirs = {} - for path in session.paths: - resume_dir = progress_get(path) - if resume_dir: - - # Either accept immediately or prompt for input to decide. - if _resume() is True: - do_resume = True - log.warn('Resuming interrupted import of %s' % path) - else: - do_resume = session.should_resume(path) - - if do_resume: - resume_dirs[path] = resume_dir - else: - # Clear progress; we're starting from the top. - progress_set(path, None) - - # Look for saved incremental directories. - if config['import']['incremental']: - incremental_skipped = 0 - history_dirs = history_get() - + skipped = 0 for toppath in session.paths: - # Check whether the path is to a file. - if config['import']['singletons'] and \ - not os.path.isdir(syspath(toppath)): + # Determine if we want to resume import of the toppath + session.ask_resume(toppath) + user_toppath = toppath + + # Extract archives. + archive_task = None + if ArchiveImportTask.is_archive(syspath(toppath)): + if not (session.config['move'] or session.config['copy']): + log.warn(u"Archive importing requires either " + "'copy' or 'move' to be enabled.") + continue + + log.debug(u'extracting archive {0}' + .format(displayable_path(toppath))) + archive_task = ArchiveImportTask(toppath) try: - item = library.Item.from_path(toppath) - except mediafile.UnreadableFileError: - log.warn(u'unreadable file: {0}'.format( - util.displayable_path(toppath) - )) - continue - yield ImportTask.item_task(item) - continue - - # A flat album import merges all items into one album. - if config['import']['flat'] and not config['import']['singletons']: - all_items = [] - for _, items in autotag.albums_in_dir(toppath): - all_items += items - yield ImportTask(toppath, toppath, all_items) - yield ImportTask.done_sentinel(toppath) - continue - - # Produce paths under this directory. - if _resume(): - resume_dir = resume_dirs.get(toppath) - for path, items in autotag.albums_in_dir(toppath): - # Skip according to progress. - if _resume() and resume_dir: - # We're fast-forwarding to resume a previous tagging. - if path == resume_dir: - # We've hit the last good path! Turn off the - # fast-forwarding. - resume_dir = None + archive_task.extract() + except Exception as exc: + log.error(u'extraction failed: {0}'.format(exc)) continue - # When incremental, skip paths in the history. - if config['import']['incremental'] and tuple(path) in history_dirs: - log.debug(u'Skipping previously-imported path: %s' % - displayable_path(path)) - incremental_skipped += 1 - continue + # Continue reading albums from the extracted directory. + toppath = archive_task.toppath - # Yield all the necessary tasks. - if config['import']['singletons']: - for item in items: - yield ImportTask.item_task(item) - yield ImportTask.progress_sentinel(toppath, path) - else: - yield ImportTask(toppath, path, items) + task_factory = ImportTaskFactory(toppath, session) + imported = False + for t in task_factory.tasks(): + imported |= not t.skip + yield t # Indicate the directory is finished. - yield ImportTask.done_sentinel(toppath) + # FIXME hack to delete extracted archives + if archive_task is None: + yield task_factory.sentinel() + else: + yield archive_task + + if not imported: + log.warn(u'No files imported from {0}' + .format(displayable_path(user_toppath))) # Show skipped directories. - if config['import']['incremental'] and incremental_skipped: - log.info(u'Incremental import: skipped %i directories.' % - incremental_skipped) + if skipped: + log.info(u'Skipped {0} directories.'.format(skipped)) + def query_tasks(session): """A generator that works as a drop-in-replacement for read_tasks. Instead of finding files from the filesystem, a query is used to match items from the library. """ - if config['import']['singletons']: + if session.config['singletons']: # Search for items. for item in session.lib.items(session.query): - yield ImportTask.item_task(item) + yield SingletonImportTask(None, item) else: # Search for albums. for album in session.lib.albums(session.query): - log.debug('yielding album %i: %s - %s' % - (album.id, album.albumartist, album.album)) + log.debug(u'yielding album {0}: {1} - {2}' + .format(album.id, album.albumartist, album.album)) items = list(album.items()) + + # Clear IDs from re-tagged items so they appear "fresh" when + # we add them back to the library. + for item in items: + item.id = None + item.album_id = None + yield ImportTask(None, [album.item_dir()], items) -def initial_lookup(session): + +@pipeline.mutator_stage +def lookup_candidates(session, task): """A coroutine for performing the initial MusicBrainz lookup for an album. It accepts lists of Items and yields (items, cur_artist, cur_album, candidates, rec) tuples. If no match is found, all of the yielded parameters (except items) are None. """ - task = None - while True: - task = yield task - if task.should_skip(): - continue + if task.skip: + # FIXME This gets duplicated a lot. We need a better + # abstraction. + return - plugins.send('import_task_start', session=session, task=task) + plugins.send('import_task_start', session=session, task=task) + log.debug(u'Looking up: {0}'.format(displayable_path(task.paths))) + task.lookup_candidates() - log.debug('Looking up: %s' % displayable_path(task.paths)) - task.set_candidates( - *autotag.tag_album(task.items) - ) -def user_query(session): +@pipeline.stage +def user_query(session, task): """A coroutine for interfacing with the user about the tagging process. The coroutine accepts an ImportTask objects. It uses the - session's ``choose_match`` method to determine the ``action`` for + session's `choose_match` method to determine the `action` for this task. Depending on the action additional stages are exectuted and the processed task is yielded. @@ -670,344 +1177,135 @@ def user_query(session): acces to the choice via the ``taks.choice_flag`` property and may choose to change it. """ - recent = set() - task = None - while True: - task = yield task - if task.should_skip(): - continue + if task.skip: + return task - # Ask the user for a choice. - choice = session.choose_match(task) - task.set_choice(choice) - session.log_choice(task) - plugins.send('import_task_choice', session=session, task=task) + # Ask the user for a choice. + task.choose_match(session) + plugins.send('import_task_choice', session=session, task=task) - # As-tracks: transition to singleton workflow. - if task.choice_flag is action.TRACKS: - # Set up a little pipeline for dealing with the singletons. - def emitter(task): - for item in task.items: - yield ImportTask.item_task(item) - yield ImportTask.progress_sentinel(task.toppath, task.paths) + # As-tracks: transition to singleton workflow. + if task.choice_flag is action.TRACKS: + # Set up a little pipeline for dealing with the singletons. + def emitter(task): + for item in task.items: + yield SingletonImportTask(task.toppath, item) + yield SentinelImportTask(task.toppath, task.paths) - ipl = pipeline.Pipeline([ - emitter(task), - item_lookup(session), - item_query(session), - ]) - task = pipeline.multiple(ipl.pull()) - continue + ipl = pipeline.Pipeline([ + emitter(task), + lookup_candidates(session), + user_query(session), + ]) + return pipeline.multiple(ipl.pull()) - # As albums: group items by albums and create task for each album - if task.choice_flag is action.ALBUMS: - def emitter(task): - yield task + # As albums: group items by albums and create task for each album + if task.choice_flag is action.ALBUMS: + ipl = pipeline.Pipeline([ + iter([task]), + group_albums(session), + lookup_candidates(session), + user_query(session) + ]) + return pipeline.multiple(ipl.pull()) - ipl = pipeline.Pipeline([ - emitter(task), - group_albums(session), - initial_lookup(session), - user_query(session) - ]) - task = pipeline.multiple(ipl.pull()) - continue + resolve_duplicates(session, task) + return task - # Check for duplicates if we have a match (or ASIS). - if task.choice_flag in (action.ASIS, action.APPLY): - ident = task.chosen_ident() - # The "recent" set keeps track of identifiers for recently - # imported albums -- those that haven't reached the database - # yet. - if ident in recent or _duplicate_check(session.lib, task): - session.resolve_duplicate(task) - session.log_choice(task, True) - recent.add(ident) -def show_progress(session): - """This stage replaces the initial_lookup and user_query stages - when the importer is run without autotagging. It displays the album - name and artist as the files are added. +def resolve_duplicates(session, task): + """Check if a task conflicts with items or albums already imported + and ask the session to resolve this. """ - task = None - while True: - task = yield task - if task.should_skip(): - continue + if task.choice_flag in (action.ASIS, action.APPLY): + ident = task.chosen_ident() + found_duplicates = task.find_duplicates(session.lib) + if ident in session.seen_idents or found_duplicates: + session.resolve_duplicate(task, found_duplicates) + session.log_choice(task, True) + session.seen_idents.add(ident) - log.info(displayable_path(task.paths)) - # Behave as if ASIS were selected. - task.set_null_candidates() - task.set_choice(action.ASIS) +@pipeline.mutator_stage +def import_asis(session, task): + """Select the `action.ASIS` choice for all tasks. -def apply_choices(session): + This stage replaces the initial_lookup and user_query stages + when the importer is run without autotagging. + """ + if task.skip: + return + + log.info(displayable_path(task.paths)) + task.set_choice(action.ASIS) + + +@pipeline.mutator_stage +def apply_choices(session, task): """A coroutine for applying changes to albums and singletons during the autotag process. """ - task = None - while True: - task = yield task - if task.should_skip(): - continue + if task.skip: + return - items = task.imported_items() - # Clear IDs in case the items are being re-tagged. - for item in items: - item.id = None - item.album_id = None + # Change metadata. + if task.apply: + task.apply_metadata() + plugins.send('import_task_apply', session=session, task=task) - # Change metadata. - if task.should_write_tags(): - if task.is_album: - autotag.apply_metadata( - task.match.info, task.match.mapping - ) - else: - autotag.apply_item_metadata(task.item, task.match.info) - plugins.send('import_task_apply', session=session, task=task) + task.add(session.lib) - # Infer album-level fields. - if task.is_album: - _infer_album_fields(task) - # Find existing item entries that these are replacing (for - # re-imports). Old album structures are automatically cleaned up - # when the last item is removed. - task.replaced_items = defaultdict(list) - for item in items: - dup_items = session.lib.items( - dbcore.query.BytesQuery('path', item.path) - ) - for dup_item in dup_items: - task.replaced_items[item].append(dup_item) - log.debug('replacing item %i: %s' % - (dup_item.id, displayable_path(item.path))) - log.debug('%i of %i items replaced' % (len(task.replaced_items), - len(items))) - - # Find old items that should be replaced as part of a duplicate - # resolution. - duplicate_items = [] - if task.remove_duplicates: - if task.is_album: - for album in _duplicate_check(session.lib, task): - duplicate_items += album.items() - else: - duplicate_items = _item_duplicate_check(session.lib, task) - log.debug('removing %i old duplicated items' % - len(duplicate_items)) - - # Delete duplicate files that are located inside the library - # directory. - task.duplicate_paths = [] - for duplicate_path in [i.path for i in duplicate_items]: - if session.lib.directory in util.ancestry(duplicate_path): - # Mark the path for deletion in the manipulate_files - # stage. - task.duplicate_paths.append(duplicate_path) - - # Add items -- before path changes -- to the library. We add the - # items now (rather than at the end) so that album structures - # are in place before calls to destination(). - with session.lib.transaction(): - # Remove old items. - for replaced in task.replaced_items.itervalues(): - for item in replaced: - item.remove() - for item in duplicate_items: - item.remove() - - # Add new ones. - if task.is_album: - # Add an album. - album = session.lib.add_album(items) - task.album_id = album.id - else: - # Add tracks. - for item in items: - session.lib.add(item) - -def plugin_stage(session, func): +@pipeline.mutator_stage +def plugin_stage(session, func, task): """A coroutine (pipeline stage) that calls the given function with each non-skipped import task. These stages occur between applying metadata changes and moving/copying/writing files. """ - task = None - while True: - task = yield task - if task.should_skip(): - continue - func(session, task) + if task.skip: + return - # Stage may modify DB, so re-load cached item data. - for item in task.imported_items(): - item.load() + func(session, task) -def manipulate_files(session): + # Stage may modify DB, so re-load cached item data. + # FIXME Importer plugins should not modify the database but instead + # the albums and items attached to tasks. + task.reload() + + +@pipeline.stage +def manipulate_files(session, task): """A coroutine (pipeline stage) that performs necessary file - manipulations *after* items have been added to the library. + manipulations *after* items have been added to the library and + finalizes each task. """ - task = None - while True: - task = yield task - if task.should_skip(): - continue + if not task.skip: + if task.should_remove_duplicates: + task.remove_duplicates(session.lib) - # Remove duplicate files marked for deletion. - if task.remove_duplicates: - for duplicate_path in task.duplicate_paths: - log.debug(u'deleting replaced duplicate %s' % - util.displayable_path(duplicate_path)) - util.remove(duplicate_path) - util.prune_dirs(os.path.dirname(duplicate_path), - session.lib.directory) + task.manipulate_files( + move=session.config['move'], + copy=session.config['copy'], + write=session.config['write'], + link=session.config['link'], + session=session, + ) - # Move/copy/write files. - items = task.imported_items() - # Save the original paths of all items for deletion and pruning - # in the next step (finalization). - task.old_paths = [item.path for item in items] - for item in items: - if config['import']['move']: - # Just move the file. - item.move(False) - elif config['import']['copy']: - # If it's a reimport, move in-library files and copy - # out-of-library files. Otherwise, copy and keep track - # of the old path. - old_path = item.path - if task.replaced_items[item]: - # This is a reimport. Move in-library files and copy - # out-of-library files. - if session.lib.directory in util.ancestry(old_path): - item.move(False) - # We moved the item, so remove the - # now-nonexistent file from old_paths. - task.old_paths.remove(old_path) - else: - item.move(True) - else: - # A normal import. Just copy files and keep track of - # old paths. - item.move(True) + # Progress, cleanup, and event. + task.finalize(session) - if config['import']['write'] and task.should_write_tags(): - try: - item.write() - except library.FileOperationError as exc: - log.error(exc) - # Save new paths. - with session.lib.transaction(): - for item in items: - item.store() - - # Plugin event. - plugins.send('import_task_files', session=session, task=task) - -def finalize(session): - """A coroutine that finishes up importer tasks. In particular, the - coroutine sends plugin events, deletes old files, and saves - progress. This is a "terminal" coroutine (it yields None). +@pipeline.stage +def log_files(session, task): + """A coroutine (pipeline stage) to log each file which will be imported """ - while True: - task = yield - if task.should_skip(): - if _resume(): - task.save_progress() - if config['import']['incremental']: - task.save_history() - continue - - items = task.imported_items() - - # Announce that we've added an album. - if task.is_album: - album = session.lib.get_album(task.album_id) - plugins.send('album_imported', - lib=session.lib, album=album) - else: - for item in items: - plugins.send('item_imported', - lib=session.lib, item=item) - - # When copying and deleting originals, delete old files. - if config['import']['copy'] and config['import']['delete']: - new_paths = [os.path.realpath(item.path) for item in items] - for old_path in task.old_paths: - # Only delete files that were actually copied. - if old_path not in new_paths: - util.remove(syspath(old_path), False) - task.prune(old_path) - - # When moving, prune empty directories containing the original - # files. - elif config['import']['move']: - for old_path in task.old_paths: - task.prune(old_path) - - # Update progress. - if _resume(): - task.save_progress() - if config['import']['incremental']: - task.save_history() - - -# Singleton pipeline stages. - -def item_lookup(session): - """A coroutine used to perform the initial MusicBrainz lookup for - an item task. - """ - task = None - while True: - task = yield task - if task.should_skip(): - continue - - plugins.send('import_task_start', session=session, task=task) - - task.set_item_candidates(*autotag.tag_item(task.item)) - -def item_query(session): - """A coroutine that queries the user for input on single-item - lookups. - """ - task = None - recent = set() - while True: - task = yield task - if task.should_skip(): - continue - - choice = session.choose_item(task) - task.set_choice(choice) - session.log_choice(task) - plugins.send('import_task_choice', session=session, task=task) - - # Duplicate check. - if task.choice_flag in (action.ASIS, action.APPLY): - ident = task.chosen_ident() - if ident in recent or _item_duplicate_check(session.lib, task): - session.resolve_duplicate(task) - session.log_choice(task, True) - recent.add(ident) - -def item_progress(session): - """Skips the lookup and query stages in a non-autotagged singleton - import. Just shows progress. - """ - task = None - log.info('Importing items:') - while True: - task = yield task - if task.should_skip(): - continue - - log.info(displayable_path(task.item.path)) - task.set_null_candidates() - task.set_choice(action.ASIS) + if isinstance(task, SingletonImportTask): + log.info( + 'Singleton: {0}'.format(displayable_path(task.item['path']))) + elif task.items: + log.info('Album {0}'.format(displayable_path(task.paths[0]))) + for item in task.items: + log.info(' {0}'.format(displayable_path(item['path']))) def group_albums(session): @@ -1020,11 +1318,111 @@ def group_albums(session): task = None while True: task = yield task - if task.should_skip(): + if task.skip: continue tasks = [] for _, items in itertools.groupby(task.items, group): tasks.append(ImportTask(items=list(items))) - tasks.append(ImportTask.progress_sentinel(task.toppath, task.paths)) + tasks.append(SentinelImportTask(task.toppath, task.paths)) task = pipeline.multiple(tasks) + + +MULTIDISC_MARKERS = (r'dis[ck]', r'cd') +MULTIDISC_PAT_FMT = r'^(.*%s[\W_]*)\d' + + +def albums_in_dir(path): + """Recursively searches the given directory and returns an iterable + of (paths, items) where paths is a list of directories and items is + a list of Items that is probably an album. Specifically, any folder + containing any media files is an album. + """ + collapse_pat = collapse_paths = collapse_items = None + ignore = config['ignore'].as_str_seq() + + for root, dirs, files in sorted_walk(path, ignore=ignore, logger=log): + items = [os.path.join(root, f) for f in files] + # If we're currently collapsing the constituent directories in a + # multi-disc album, check whether we should continue collapsing + # and add the current directory. If so, just add the directory + # and move on to the next directory. If not, stop collapsing. + if collapse_paths: + if (not collapse_pat and collapse_paths[0] in ancestry(root)) or \ + (collapse_pat and + collapse_pat.match(os.path.basename(root))): + # Still collapsing. + collapse_paths.append(root) + collapse_items += items + continue + else: + # Collapse finished. Yield the collapsed directory and + # proceed to process the current one. + if collapse_items: + yield collapse_paths, collapse_items + collapse_pat = collapse_paths = collapse_items = None + + # Check whether this directory looks like the *first* directory + # in a multi-disc sequence. There are two indicators: the file + # is named like part of a multi-disc sequence (e.g., "Title Disc + # 1") or it contains no items but only directories that are + # named in this way. + start_collapsing = False + for marker in MULTIDISC_MARKERS: + marker_pat = re.compile(MULTIDISC_PAT_FMT % marker, re.I) + match = marker_pat.match(os.path.basename(root)) + + # Is this directory the root of a nested multi-disc album? + if dirs and not items: + # Check whether all subdirectories have the same prefix. + start_collapsing = True + subdir_pat = None + for subdir in dirs: + # The first directory dictates the pattern for + # the remaining directories. + if not subdir_pat: + match = marker_pat.match(subdir) + if match: + subdir_pat = re.compile( + r'^%s\d' % re.escape(match.group(1)), re.I + ) + else: + start_collapsing = False + break + + # Subsequent directories must match the pattern. + elif not subdir_pat.match(subdir): + start_collapsing = False + break + + # If all subdirectories match, don't check other + # markers. + if start_collapsing: + break + + # Is this directory the first in a flattened multi-disc album? + elif match: + start_collapsing = True + # Set the current pattern to match directories with the same + # prefix as this one, followed by a digit. + collapse_pat = re.compile( + r'^%s\d' % re.escape(match.group(1)), re.I + ) + break + + # If either of the above heuristics indicated that this is the + # beginning of a multi-disc album, initialize the collapsed + # directory and item lists and check the next directory. + if start_collapsing: + # Start collapsing; continue to the next iteration. + collapse_paths = [root] + collapse_items = items + continue + + # If it's nonempty, yield it. + if items: + yield [root], items + + # Clear out any unfinished collapse. + if collapse_paths and collapse_items: + yield collapse_paths, collapse_items diff --git a/lib/beets/library.py b/lib/beets/library.py index 94559430..1de1bba5 100644 --- a/lib/beets/library.py +++ b/lib/beets/library.py @@ -15,14 +15,14 @@ """The core data store and collection logic for beets. """ import os -import re import sys import logging import shlex import unicodedata import time +import re from unidecode import unidecode -from beets.mediafile import MediaFile, MutagenError +from beets.mediafile import MediaFile, MutagenError, UnreadableFileError from beets import plugins from beets import util from beets.util import bytestring_path, syspath, normpath, samefile @@ -32,12 +32,17 @@ from beets.dbcore import types import beets +log = logging.getLogger('beets') + # Library-specific query types. - class PathQuery(dbcore.FieldQuery): """A query that matches all items under a given path.""" + + escape_re = re.compile(r'[\\_%]') + escape_char = '\\' + def __init__(self, field, pattern, fast=True): super(PathQuery, self).__init__(field, pattern, fast) @@ -48,36 +53,22 @@ class PathQuery(dbcore.FieldQuery): def match(self, item): return (item.path == self.file_path) or \ - item.path.startswith(self.dir_path) + item.path.startswith(self.dir_path) def clause(self): - dir_pat = buffer(self.dir_path + '%') + escape = lambda m: self.escape_char + m.group(0) + dir_pattern = self.escape_re.sub(escape, self.dir_path) + dir_pattern = buffer(dir_pattern + '%') file_blob = buffer(self.file_path) - return '({0} = ?) || ({0} LIKE ?)'.format(self.field), \ - (file_blob, dir_pat) - - -class SingletonQuery(dbcore.Query): - """Matches either singleton or non-singleton items.""" - def __init__(self, sense): - self.sense = sense - - def clause(self): - if self.sense: - return "album_id ISNULL", () - else: - return "NOT album_id ISNULL", () - - def match(self, item): - return (not item.album_id) == self.sense - + return '({0} = ?) || ({0} LIKE ? ESCAPE ?)'.format(self.field), \ + (file_blob, dir_pattern, self.escape_char) # Library-specific field types. - -class DateType(types.Type): - sql = u'REAL' +class DateType(types.Float): + # TODO representation should be `datetime` object + # TODO distinguish beetween date and time types query = dbcore.query.DateQuery def format(self, value): @@ -95,12 +86,13 @@ class DateType(types.Type): try: return float(string) except ValueError: - return 0.0 + return self.null class PathType(types.Type): sql = u'BLOB' query = PathQuery + model_type = bytes def format(self, value): return util.displayable_path(value) @@ -108,158 +100,89 @@ class PathType(types.Type): def parse(self, string): return normpath(bytestring_path(string)) + def normalize(self, value): + if isinstance(value, unicode): + # Paths stored internally as encoded bytes. + return bytestring_path(value) + + elif isinstance(value, buffer): + # SQLite must store bytestings as buffers to avoid decoding. + # We unwrap buffers to bytes. + return bytes(value) + + else: + return value + + def from_sql(self, sql_value): + return self.normalize(sql_value) + + def to_sql(self, value): + if isinstance(value, str): + value = buffer(value) + return value -# Model field lists. +class MusicalKey(types.String): + """String representing the musical key of a song. + + The standard format is C, Cm, C#, C#m, etc. + """ + ENHARMONIC = { + r'db': 'c#', + r'eb': 'd#', + r'gb': 'f#', + r'ab': 'g#', + r'bb': 'a#', + } + + def parse(self, key): + key = key.lower() + for flat, sharp in self.ENHARMONIC.items(): + key = re.sub(flat, sharp, key) + key = re.sub(r'[\W\s]+minor', 'm', key) + return key.capitalize() + + def normalize(self, key): + if key is None: + return None + else: + return self.parse(key) -# Fields in the "items" database table; all the metadata available for -# items in the library. These are used directly in SQL; they are -# vulnerable to injection if accessible to the user. -# Each tuple has the following values: -# - The name of the field. -# - The (Python) type of the field. -# - Is the field writable? -# - Does the field reflect an attribute of a MediaFile? -ITEM_FIELDS = [ - ('id', types.Id(), False, False), - ('path', PathType(), False, False), - ('album_id', types.Integer(), False, False), +# Library-specific sort types. - ('title', types.String(), True, True), - ('artist', types.String(), True, True), - ('artist_sort', types.String(), True, True), - ('artist_credit', types.String(), True, True), - ('album', types.String(), True, True), - ('albumartist', types.String(), True, True), - ('albumartist_sort', types.String(), True, True), - ('albumartist_credit', types.String(), True, True), - ('genre', types.String(), True, True), - ('composer', types.String(), True, True), - ('grouping', types.String(), True, True), - ('year', types.PaddedInt(4), True, True), - ('month', types.PaddedInt(2), True, True), - ('day', types.PaddedInt(2), True, True), - ('track', types.PaddedInt(2), True, True), - ('tracktotal', types.PaddedInt(2), True, True), - ('disc', types.PaddedInt(2), True, True), - ('disctotal', types.PaddedInt(2), True, True), - ('lyrics', types.String(), True, True), - ('comments', types.String(), True, True), - ('bpm', types.Integer(), True, True), - ('comp', types.Boolean(), True, True), - ('mb_trackid', types.String(), True, True), - ('mb_albumid', types.String(), True, True), - ('mb_artistid', types.String(), True, True), - ('mb_albumartistid', types.String(), True, True), - ('albumtype', types.String(), True, True), - ('label', types.String(), True, True), - ('acoustid_fingerprint', types.String(), True, True), - ('acoustid_id', types.String(), True, True), - ('mb_releasegroupid', types.String(), True, True), - ('asin', types.String(), True, True), - ('catalognum', types.String(), True, True), - ('script', types.String(), True, True), - ('language', types.String(), True, True), - ('country', types.String(), True, True), - ('albumstatus', types.String(), True, True), - ('media', types.String(), True, True), - ('albumdisambig', types.String(), True, True), - ('disctitle', types.String(), True, True), - ('encoder', types.String(), True, True), - ('rg_track_gain', types.Float(), True, True), - ('rg_track_peak', types.Float(), True, True), - ('rg_album_gain', types.Float(), True, True), - ('rg_album_peak', types.Float(), True, True), - ('original_year', types.PaddedInt(4), True, True), - ('original_month', types.PaddedInt(2), True, True), - ('original_day', types.PaddedInt(2), True, True), +class SmartArtistSort(dbcore.query.Sort): + """Sort by artist (either album artist or track artist), + prioritizing the sort field over the raw field. + """ + def __init__(self, model_cls, ascending=True): + self.album = model_cls is Album + self.ascending = ascending - ('length', types.Float(), False, True), - ('bitrate', types.ScaledInt(1000, u'kbps'), False, True), - ('format', types.String(), False, True), - ('samplerate', types.ScaledInt(1000, u'kHz'), False, True), - ('bitdepth', types.Integer(), False, True), - ('channels', types.Integer(), False, True), - ('mtime', DateType(), False, False), - ('added', DateType(), False, False), -] -ITEM_KEYS_WRITABLE = [f[0] for f in ITEM_FIELDS if f[3] and f[2]] -ITEM_KEYS_META = [f[0] for f in ITEM_FIELDS if f[3]] -ITEM_KEYS = [f[0] for f in ITEM_FIELDS] + def order_clause(self): + order = "ASC" if self.ascending else "DESC" + if self.album: + field = 'albumartist' + else: + field = 'artist' + return ('(CASE {0}_sort WHEN NULL THEN {0} ' + 'WHEN "" THEN {0} ' + 'ELSE {0}_sort END) {1}').format(field, order) -# Database fields for the "albums" table. -# The third entry in each tuple indicates whether the field reflects an -# identically-named field in the items table. -ALBUM_FIELDS = [ - ('id', types.Id(), False), - ('artpath', PathType(), False), - ('added', DateType(), True), - - ('albumartist', types.String(), True), - ('albumartist_sort', types.String(), True), - ('albumartist_credit', types.String(), True), - ('album', types.String(), True), - ('genre', types.String(), True), - ('year', types.PaddedInt(4), True), - ('month', types.PaddedInt(2), True), - ('day', types.PaddedInt(2), True), - ('tracktotal', types.PaddedInt(2), True), - ('disctotal', types.PaddedInt(2), True), - ('comp', types.Boolean(), True), - ('mb_albumid', types.String(), True), - ('mb_albumartistid', types.String(), True), - ('albumtype', types.String(), True), - ('label', types.String(), True), - ('mb_releasegroupid', types.String(), True), - ('asin', types.String(), True), - ('catalognum', types.String(), True), - ('script', types.String(), True), - ('language', types.String(), True), - ('country', types.String(), True), - ('albumstatus', types.String(), True), - ('media', types.String(), True), - ('albumdisambig', types.String(), True), - ('rg_album_gain', types.Float(), True), - ('rg_album_peak', types.Float(), True), - ('original_year', types.PaddedInt(4), True), - ('original_month', types.PaddedInt(2), True), - ('original_day', types.PaddedInt(2), True), -] -ALBUM_KEYS = [f[0] for f in ALBUM_FIELDS] -ALBUM_KEYS_ITEM = [f[0] for f in ALBUM_FIELDS if f[2]] - - -# Default search fields for each model. -ALBUM_DEFAULT_FIELDS = ('album', 'albumartist', 'genre') -ITEM_DEFAULT_FIELDS = ALBUM_DEFAULT_FIELDS + ('artist', 'title', 'comments') + def sort(self, objs): + if self.album: + key = lambda a: a.albumartist_sort or a.albumartist + else: + key = lambda i: i.artist_sort or i.artist + return sorted(objs, key=key, reverse=not self.ascending) # Special path format key. PF_KEY_DEFAULT = 'default' -# Logger. -log = logging.getLogger('beets') -if not log.handlers: - log.addHandler(logging.StreamHandler()) -log.propagate = False # Don't propagate to root handler. - - -# A little SQL utility. -def _orelse(exp1, exp2): - """Generates an SQLite expression that evaluates to exp1 if exp1 is - non-null and non-empty or exp2 otherwise. - """ - return ('(CASE {0} WHEN NULL THEN {1} ' - 'WHEN "" THEN {1} ' - 'ELSE {0} END)').format(exp1, exp2) - - - # Exceptions. - class FileOperationError(Exception): """Indicates an error when interacting with a file on disk. Possibilities include an unsupported media type, a permissions @@ -300,14 +223,11 @@ class WriteError(FileOperationError): return u'error writing ' + super(WriteError, self).__unicode__() - # Item and Album model classes. - class LibModel(dbcore.Model): """Shared concrete functionality for Items and Albums. """ - _bytes_keys = ('path', 'artpath') def _template_funcs(self): funcs = DefaultTemplateFunctions(self, self._db).functions() @@ -327,15 +247,146 @@ class LibModel(dbcore.Model): plugins.send('database_change', lib=self._db) +class FormattedItemMapping(dbcore.db.FormattedMapping): + """Add lookup for album-level fields. + + Album-level fields take precedence if `for_path` is true. + """ + + def __init__(self, item, for_path=False): + super(FormattedItemMapping, self).__init__(item, for_path) + self.album = item.get_album() + self.album_keys = [] + if self.album: + for key in self.album.keys(True): + if key in Album.item_keys or key not in item._fields.keys(): + self.album_keys.append(key) + self.all_keys = set(self.model_keys).union(self.album_keys) + + def _get(self, key): + """Get the value for a key, either from the album or the item. + Raise a KeyError for invalid keys. + """ + if self.for_path and key in self.album_keys: + return self._get_formatted(self.album, key) + elif key in self.model_keys: + return self._get_formatted(self.model, key) + elif key in self.album_keys: + return self._get_formatted(self.album, key) + else: + raise KeyError(key) + + def __getitem__(self, key): + """Get the value for a key. Certain unset values are remapped. + """ + value = self._get(key) + + # `artist` and `albumartist` fields fall back to one another. + # This is helpful in path formats when the album artist is unset + # on as-is imports. + if key == 'artist' and not value: + return self._get('albumartist') + elif key == 'albumartist' and not value: + return self._get('artist') + else: + return value + + def __iter__(self): + return iter(self.all_keys) + + def __len__(self): + return len(self.all_keys) + + class Item(LibModel): - _fields = dict((name, typ) for (name, typ, _, _) in ITEM_FIELDS) _table = 'items' _flex_table = 'item_attributes' - _search_fields = ITEM_DEFAULT_FIELDS + _fields = { + 'id': types.PRIMARY_ID, + 'path': PathType(), + 'album_id': types.FOREIGN_ID, + + 'title': types.STRING, + 'artist': types.STRING, + 'artist_sort': types.STRING, + 'artist_credit': types.STRING, + 'album': types.STRING, + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, + 'albumartist_credit': types.STRING, + 'genre': types.STRING, + 'composer': types.STRING, + 'grouping': types.STRING, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'track': types.PaddedInt(2), + 'tracktotal': types.PaddedInt(2), + 'disc': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'lyrics': types.STRING, + 'comments': types.STRING, + 'bpm': types.INTEGER, + 'comp': types.BOOLEAN, + 'mb_trackid': types.STRING, + 'mb_albumid': types.STRING, + 'mb_artistid': types.STRING, + 'mb_albumartistid': types.STRING, + 'albumtype': types.STRING, + 'label': types.STRING, + 'acoustid_fingerprint': types.STRING, + 'acoustid_id': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'media': types.STRING, + 'albumdisambig': types.STRING, + 'disctitle': types.STRING, + 'encoder': types.STRING, + 'rg_track_gain': types.NULL_FLOAT, + 'rg_track_peak': types.NULL_FLOAT, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), + 'initial_key': MusicalKey(), + + 'length': types.FLOAT, + 'bitrate': types.ScaledInt(1000, u'kbps'), + 'format': types.STRING, + 'samplerate': types.ScaledInt(1000, u'kHz'), + 'bitdepth': types.INTEGER, + 'channels': types.INTEGER, + 'mtime': DateType(), + 'added': DateType(), + } + + _search_fields = ('artist', 'title', 'comments', + 'album', 'albumartist', 'genre') + + _media_fields = set(MediaFile.readable_fields()) \ + .intersection(_fields.keys()) + """Set of item fields that are backed by `MediaFile` fields. + + Any kind of field (fixed, flexible, and computed) may be a media + field. Only these fields are read from disk in `read` and written in + `write`. + """ + + _formatter = FormattedItemMapping + + _sorts = {'artist': SmartArtistSort} @classmethod def _getters(cls): - return plugins.item_field_getters() + getters = plugins.item_field_getters() + getters['singleton'] = lambda i: i.album_id is None + return getters @classmethod def from_path(cls, path): @@ -357,13 +408,13 @@ class Item(LibModel): elif isinstance(value, buffer): value = str(value) - if key in ITEM_KEYS_WRITABLE: + if key in MediaFile.fields(): self.mtime = 0 # Reset mtime on dirty. super(Item, self).__setitem__(key, value) def update(self, values): - """Sett all key/value pairs in the mapping. If mtime is + """Set all key/value pairs in the mapping. If mtime is specified, it is not reset (as it might otherwise be). """ super(Item, self).update(values) @@ -379,12 +430,14 @@ class Item(LibModel): return None return self._db.get_album(self) - # Interaction with file metadata. def read(self, read_path=None): - """Read the metadata from the associated file. If read_path is - specified, read metadata from that file instead. + """Read the metadata from the associated file. + + If `read_path` is specified, read metadata from that file + instead. Updates all the properties in `_media_fields` + from the media file. Raises a `ReadError` if the file could not be read. """ @@ -393,20 +446,19 @@ class Item(LibModel): else: read_path = normpath(read_path) try: - f = MediaFile(syspath(read_path)) - except (OSError, IOError) as exc: + mediafile = MediaFile(syspath(read_path)) + except (OSError, IOError, UnreadableFileError) as exc: raise ReadError(read_path, exc) - for key in ITEM_KEYS_META: - value = getattr(f, key) + for key in self._media_fields: + value = getattr(mediafile, key) if isinstance(value, (int, long)): - # Filter values wider than 64 bits (in signed - # representation). SQLite cannot store them. - # py26: Post transition, we can use: + # Filter values wider than 64 bits (in signed representation). + # SQLite cannot store them. py26: Post transition, we can use: # value.bit_length() > 63 if abs(value) >= 2 ** 63: value = 0 - setattr(self, key, value) + self[key] = value # Database's mtime should now reflect the on-disk value. if read_path == self.path: @@ -414,33 +466,71 @@ class Item(LibModel): self.path = read_path - def write(self): - """Write the item's metadata to the associated file. + def write(self, path=None): + """Write the item's metadata to a media file. + + All fields in `_media_fields` are written to disk according to + the values on this object. Can raise either a `ReadError` or a `WriteError`. """ + if path is None: + path = self.path + else: + path = normpath(path) + + tags = dict(self) + plugins.send('write', item=self, path=path, tags=tags) + try: - f = MediaFile(syspath(self.path)) - except (OSError, IOError) as exc: + mediafile = MediaFile(syspath(path), + id3v23=beets.config['id3v23'].get(bool)) + except (OSError, IOError, UnreadableFileError) as exc: raise ReadError(self.path, exc) - plugins.send('write', item=self) - - for key in ITEM_KEYS_WRITABLE: - setattr(f, key, self[key]) + mediafile.update(tags) try: - f.save(id3v23=beets.config['id3v23'].get(bool)) + mediafile.save() except (OSError, IOError, MutagenError) as exc: raise WriteError(self.path, exc) # The file has a new mtime. - self.mtime = self.current_mtime() - plugins.send('after_write', item=self) + if path == self.path: + self.mtime = self.current_mtime() + plugins.send('after_write', item=self, path=path) + def try_write(self, path=None): + """Calls `write()` but catches and logs `FileOperationError` + exceptions. + + Returns `False` an exception was caught and `True` otherwise. + """ + try: + self.write(path) + return True + except FileOperationError as exc: + log.error(exc) + return False + + def try_sync(self, write=None): + """Synchronize the item with the database and the media file + tags, updating them with this object's current state. + + By default, the current `path` for the item is used to write + tags. If `write` is `False`, no tags are written. If `write` is + a path, tags are written to that file instead. + + Similar to calling :meth:`write` and :meth:`store`. + """ + if write is True: + write = None + if write is not False: + self.try_write(path=write) + self.store() # Files themselves. - def move_file(self, dest, copy=False): + def move_file(self, dest, copy=False, link=False): """Moves or copies the item's file, updating the path value if the move succeeds. If a file exists at ``dest``, then it is slightly modified to be unique. @@ -451,7 +541,13 @@ class Item(LibModel): util.copy(self.path, dest) plugins.send("item_copied", item=self, source=self.path, destination=dest) + elif link: + util.link(self.path, dest) + plugins.send("item_linked", item=self, source=self.path, + destination=dest) else: + plugins.send("before_item_moved", item=self, source=self.path, + destination=dest) util.move(self.path, dest) plugins.send("item_moved", item=self, source=self.path, destination=dest) @@ -465,7 +561,6 @@ class Item(LibModel): """ return int(os.path.getmtime(syspath(self.path))) - # Model methods. def remove(self, delete=False, with_album=True): @@ -491,13 +586,14 @@ class Item(LibModel): self._db._memotable = {} - def move(self, copy=False, basedir=None, with_album=True): + def move(self, copy=False, link=False, basedir=None, with_album=True): """Move the item to its designated location within the library directory (provided by destination()). Subdirectories are created as needed. If the operation succeeds, the item's path field is updated to reflect the new location. - If copy is True, moving the file is copied rather than moved. + If `copy` is true, moving the file is copied rather than moved. + Similarly, `link` creates a symlink instead. basedir overrides the library base directory for the destination. @@ -519,7 +615,7 @@ class Item(LibModel): # Perform the move and store the change. old_path = self.path - self.move_file(dest, copy) + self.move_file(dest, copy, link) self.store() # If this item is in an album, move its art. @@ -533,31 +629,8 @@ class Item(LibModel): if not copy: util.prune_dirs(os.path.dirname(old_path), self._db.directory) - # Templating. - def _formatted_mapping(self, for_path=False): - """Get a mapping containing string-formatted values from either - this item or the associated album, if any. - """ - mapping = super(Item, self)._formatted_mapping(for_path) - - # Merge in album-level fields. - album = self.get_album() - if album: - for key in album.keys(True): - if key in ALBUM_KEYS_ITEM or key not in ITEM_KEYS: - mapping[key] = album._get_formatted(key, for_path) - - # Use the album artist if the track artist is not set and - # vice-versa. - if not mapping['artist']: - mapping['artist'] = mapping['albumartist'] - if not mapping['albumartist']: - mapping['albumartist'] = mapping['artist'] - - return mapping - def destination(self, fragment=False, basedir=None, platform=None, path_formats=None): """Returns the path in the library directory designated for the @@ -577,7 +650,7 @@ class Item(LibModel): for query, path_format in path_formats: if query == PF_KEY_DEFAULT: continue - query = get_query(query, type(self)) + query, _ = parse_query_string(query, type(self)) if query.match(self): # The query matches the item! Use the corresponding path # format. @@ -602,8 +675,13 @@ class Item(LibModel): subpath = unicodedata.normalize('NFD', subpath) else: subpath = unicodedata.normalize('NFC', subpath) + + if beets.config['asciify_paths']: + subpath = unidecode(subpath) + # Truncate components and remove forbidden characters. subpath = util.sanitize_path(subpath, self._db.replacements) + # Encode for the filesystem. if not fragment: subpath = bytestring_path(subpath) @@ -633,10 +711,84 @@ class Album(LibModel): library. Reflects the library's "albums" table, including album art. """ - _fields = dict((name, typ) for (name, typ, _) in ALBUM_FIELDS) _table = 'albums' _flex_table = 'album_attributes' - _search_fields = ALBUM_DEFAULT_FIELDS + _always_dirty = True + _fields = { + 'id': types.PRIMARY_ID, + 'artpath': PathType(), + 'added': DateType(), + + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, + 'albumartist_credit': types.STRING, + 'album': types.STRING, + 'genre': types.STRING, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'tracktotal': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'comp': types.BOOLEAN, + 'mb_albumid': types.STRING, + 'mb_albumartistid': types.STRING, + 'albumtype': types.STRING, + 'label': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'albumdisambig': types.STRING, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), + } + + _search_fields = ('album', 'albumartist', 'genre') + + _sorts = { + 'albumartist': SmartArtistSort, + 'artist': SmartArtistSort, + } + + item_keys = [ + 'added', + 'albumartist', + 'albumartist_sort', + 'albumartist_credit', + 'album', + 'genre', + 'year', + 'month', + 'day', + 'tracktotal', + 'disctotal', + 'comp', + 'mb_albumid', + 'mb_albumartistid', + 'albumtype', + 'label', + 'mb_releasegroupid', + 'asin', + 'catalognum', + 'script', + 'language', + 'country', + 'albumstatus', + 'albumdisambig', + 'rg_album_gain', + 'rg_album_peak', + 'original_year', + 'original_month', + 'original_day', + ] + """List of keys that are set on an album's items. + """ @classmethod def _getters(cls): @@ -646,15 +798,6 @@ class Album(LibModel): getters['path'] = Album.item_dir return getters - def __setitem__(self, key, value): - """Set the value of an album attribute.""" - if key == 'artpath': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = bytes(value) - super(Album, self).__setitem__(key, value) - def items(self): """Returns an iterable over the items associated with this album. @@ -681,7 +824,7 @@ class Album(LibModel): for item in self.items(): item.remove(delete, False) - def move_art(self, copy=False): + def move_art(self, copy=False, link=False): """Move or copy any existing album art so that it remains in the same directory as the items. """ @@ -694,9 +837,13 @@ class Album(LibModel): return new_art = util.unique_path(new_art) - log.debug('moving album art %s to %s' % (old_art, new_art)) + log.debug(u'moving album art {0} to {1}' + .format(util.displayable_path(old_art), + util.displayable_path(new_art))) if copy: util.copy(old_art, new_art) + elif link: + util.link(old_art, new_art) else: util.move(old_art, new_art) self.artpath = new_art @@ -706,7 +853,7 @@ class Album(LibModel): util.prune_dirs(os.path.dirname(old_art), self._db.directory) - def move(self, copy=False, basedir=None): + def move(self, copy=False, link=False, basedir=None): """Moves (or copies) all items to their destination. Any album art moves along with them. basedir overrides the library base directory for the destination. The album is stored to the @@ -721,10 +868,10 @@ class Album(LibModel): # Move items. items = list(self.items()) for item in items: - item.move(copy, basedir=basedir, with_album=False) + item.move(copy, link, basedir=basedir, with_album=False) # Move art. - self.move_art(copy) + self.move_art(copy, link) self.store() def item_dir(self): @@ -750,6 +897,8 @@ class Album(LibModel): filename_tmpl = Template(beets.config['art_filename'].get(unicode)) subpath = self.evaluate_template(filename_tmpl, True) + if beets.config['asciify_paths']: + subpath = unidecode(subpath) subpath = util.sanitize_path(subpath, replacements=self._db.replacements) subpath = bytestring_path(subpath) @@ -792,7 +941,7 @@ class Album(LibModel): """ # Get modified track fields. track_updates = {} - for key in ALBUM_KEYS_ITEM: + for key in self.item_keys: if key in self._dirty: track_updates[key] = self[key] @@ -804,163 +953,82 @@ class Album(LibModel): item[key] = value item.store() + def try_sync(self, write=True): + """Synchronize the album and its items with the database and + their files by updating them with this object's current state. + + `write` indicates whether to write tags to the item files. + """ + self.store() + for item in self.items(): + item.try_sync(bool(write)) -# Query construction and parsing helpers. +# Query construction helpers. +def parse_query_parts(parts, model_cls): + """Given a beets query string as a list of components, return the + `Query` and `Sort` they represent. -PARSE_QUERY_PART_REGEX = re.compile( - # Non-capturing optional segment for the keyword. - r'(?:' - r'(\S+?)' # The field key. - r'(? (None, 'stapler', SubstringQuery) - 'color:red' -> ('color', 'red', SubstringQuery) - ':^Quiet' -> (None, '^Quiet', RegexpQuery) - 'color::b..e' -> ('color', 'b..e', RegexpQuery) - - Prefixes may be "escaped" with a backslash to disable the keying - behavior. + Like `dbcore.parse_sorted_query`, with beets query prefixes and + special path query detection. """ - part = part.strip() - match = PARSE_QUERY_PART_REGEX.match(part) - - assert match # Regex should always match. - key = match.group(1) - term = match.group(2).replace('\:', ':') - - # Match the search term against the list of prefixes. - for pre, query_class in prefixes.items(): - if term.startswith(pre): - return key, term[len(pre):], query_class - - # No matching prefix: use type-based or fallback/default query. - query_class = query_classes.get(key, default_class) - return key, term, query_class - - -def construct_query_part(query_part, model_cls): - """Create a query from a single query component, `query_part`, for - querying instances of `model_cls`. Return a `Query` instance. - """ - # Shortcut for empty query parts. - if not query_part: - return dbcore.query.TrueQuery() - - # Set up and parse the string. - query_classes = dict((k, t.query) for (k, t) in model_cls._fields.items()) + # Get query types and their prefix characters. prefixes = {':': dbcore.query.RegexpQuery} prefixes.update(plugins.queries()) - key, pattern, query_class = \ - parse_query_part(query_part, query_classes, prefixes) - # No key specified. - if key is None: - if os.sep in pattern and 'path' in model_cls._fields: - # This looks like a path. - return PathQuery('path', pattern) - elif issubclass(query_class, dbcore.FieldQuery): - # The query type matches a specific field, but none was - # specified. So we use a version of the query that matches - # any field. - return dbcore.query.AnyFieldQuery(pattern, - model_cls._search_fields, - query_class) - else: - # Other query type. - return query_class(pattern) - - key = key.lower() - - # Singleton query (not a real field). - if key == 'singleton': - return SingletonQuery(util.str2bool(pattern)) - - # Other field. + # Special-case path-like queries, which are non-field queries + # containing path separators (/). + if 'path' in model_cls._fields: + path_parts = [] + non_path_parts = [] + for s in parts: + if s.find(os.sep, 0, s.find(':')) != -1: + # Separator precedes colon. + path_parts.append(s) + else: + non_path_parts.append(s) else: - return query_class(key.lower(), pattern, key in model_cls._fields) + path_parts = () + non_path_parts = parts + + query, sort = dbcore.parse_sorted_query( + model_cls, non_path_parts, prefixes + ) + + # Add path queries to aggregate query. + if path_parts: + query.subqueries += [PathQuery('path', s) for s in path_parts] + return query, sort -def query_from_strings(query_cls, model_cls, query_parts): - """Creates a collection query of type `query_cls` from a list of - strings in the format used by parse_query_part. `model_cls` - determines how queries are constructed from strings. +def parse_query_string(s, model_cls): + """Given a beets query string, return the `Query` and `Sort` they + represent. + + The string is split into components using shell-like syntax. """ - subqueries = [] - for part in query_parts: - subqueries.append(construct_query_part(part, model_cls)) - if not subqueries: # No terms in query. - subqueries = [dbcore.query.TrueQuery()] - return query_cls(subqueries) - - -def get_query(val, model_cls): - """Takes a value which may be None, a query string, a query string - list, or a Query object, and returns a suitable Query object. - `model_cls` is the subclass of Model indicating which entity this - is a query for (i.e., Album or Item) and is used to determine which - fields are searched. - """ - # Convert a single string into a list of space-separated - # criteria. - if isinstance(val, basestring): - # A bug in Python < 2.7.3 prevents correct shlex splitting of - # Unicode strings. - # http://bugs.python.org/issue6988 - if isinstance(val, unicode): - val = val.encode('utf8') - val = [s.decode('utf8') for s in shlex.split(val)] - - if val is None: - return dbcore.query.TrueQuery() - elif isinstance(val, list) or isinstance(val, tuple): - return query_from_strings(dbcore.AndQuery, model_cls, val) - elif isinstance(val, dbcore.Query): - return val - else: - raise ValueError('query must be None or have type Query or str') - + # A bug in Python < 2.7.3 prevents correct shlex splitting of + # Unicode strings. + # http://bugs.python.org/issue6988 + if isinstance(s, unicode): + s = s.encode('utf8') + parts = [p.decode('utf8') for p in shlex.split(s)] + return parse_query_parts(parts, model_cls) # The Library: interface to the database. - class Library(dbcore.Database): """A database of music containing songs and albums. """ _models = (Item, Album) def __init__(self, path='library.blb', - directory='~/Music', - path_formats=((PF_KEY_DEFAULT, - '$artist/$album/$track $title'),), - replacements=None): + directory='~/Music', + path_formats=((PF_KEY_DEFAULT, + '$artist/$album/$track $title'),), + replacements=None): if path != ':memory:': self.path = bytestring_path(normpath(path)) super(Library, self).__init__(path) @@ -971,7 +1039,6 @@ class Library(dbcore.Database): self._memotable = {} # Used for template substitution performance. - # Adding objects to the database. def add(self, obj): @@ -983,12 +1050,17 @@ class Library(dbcore.Database): return obj.id def add_album(self, items): - """Create a new album consisting of a list of items. The items - are added to the database if they don't yet have an ID. Return a - new :class:`Album` object. + """Create a new album consisting of a list of items. + + The items are added to the database if they don't yet have an + ID. Return a new :class:`Album` object. The list items must not + be empty. """ + if not items: + raise ValueError(u'need at least one item') + # Create the album structure using metadata from the first item. - values = dict((key, items[0][key]) for key in ALBUM_KEYS_ITEM) + values = dict((key, items[0][key]) for key in Album.item_keys) album = Album(self, **values) # Add the album structure and set the items' album_id fields. @@ -1004,34 +1076,43 @@ class Library(dbcore.Database): return album - # Querying. - def _fetch(self, model_cls, query, order_by=None): - """Parse a query and fetch. + def _fetch(self, model_cls, query, sort=None): + """Parse a query and fetch. If a order specification is present + in the query string the `sort` argument is ignored. """ + # Parse the query, if necessary. + parsed_sort = None + if isinstance(query, basestring): + query, parsed_sort = parse_query_string(query, model_cls) + elif isinstance(query, (list, tuple)): + query, parsed_sort = parse_query_parts(query, model_cls) + + # Any non-null sort specified by the parsed query overrides the + # provided sort. + if parsed_sort and not isinstance(parsed_sort, dbcore.query.NullSort): + sort = parsed_sort + return super(Library, self)._fetch( - model_cls, get_query(query, model_cls), order_by + model_cls, query, sort ) - def albums(self, query=None): - """Get a sorted list of :class:`Album` objects matching the - given query. + def albums(self, query=None, sort=None): + """Get :class:`Album` objects matching the query. """ - order = '{0}, album'.format( - _orelse("albumartist_sort", "albumartist") + sort = sort or dbcore.sort_from_strings( + Album, beets.config['sort_album'].as_str_seq() ) - return self._fetch(Album, query, order) + return self._fetch(Album, query, sort) - def items(self, query=None): - """Get a sorted list of :class:`Item` objects matching the given - query. + def items(self, query=None, sort=None): + """Get :class:`Item` objects matching the query. """ - order = '{0}, album, disc, track'.format( - _orelse("artist_sort", "artist") + sort = sort or dbcore.sort_from_strings( + Item, beets.config['sort_item'].as_str_seq() ) - return self._fetch(Item, query, order) - + return self._fetch(Item, query, sort) # Convenience accessors. @@ -1055,10 +1136,8 @@ class Library(dbcore.Database): return self._get(Album, album_id) - # Default path template resources. - def _int_arg(s): """Convert a string argument to an integer for use in a template function. May raise a ValueError. @@ -1123,9 +1202,13 @@ class DefaultTemplateFunctions(object): otherwise, emit ``falseval`` (if provided). """ try: - condition = _int_arg(condition) + int_condition = _int_arg(condition) except ValueError: - condition = condition.strip() + if condition.lower() == "false": + return falseval + else: + condition = int_condition + if condition: return trueval else: @@ -1204,7 +1287,7 @@ class DefaultTemplateFunctions(object): return res # Flatten disambiguation value into a string. - disam_value = album._get_formatted(disambiguator, True) + disam_value = album.formatted(True).get(disambiguator) res = u' [{0}]'.format(disam_value) self.lib._memotable[memokey] = res return res diff --git a/lib/beets/mediafile.py b/lib/beets/mediafile.py index 301e0f37..49ef1037 100644 --- a/lib/beets/mediafile.py +++ b/lib/beets/mediafile.py @@ -40,6 +40,7 @@ import mutagen.mp4 import mutagen.flac import mutagen.monkeysaudio import mutagen.asf +import mutagen.aiff import datetime import re import base64 @@ -49,38 +50,15 @@ import imghdr import os import logging import traceback -from beets.util.enumeration import enum +import enum + +from beets.util import displayable_path + __all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] - - -# Logger. log = logging.getLogger('beets') - - -# Exceptions. - -class UnreadableFileError(Exception): - """Indicates a file that MediaFile can't read. - """ - pass - -class FileTypeError(UnreadableFileError): - """Raised for files that don't seem to have a type MediaFile - supports. - """ - pass - -class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug. - """ - - - -# Constants. - # Human-readable type names. TYPES = { 'mp3': 'MP3', @@ -93,9 +71,41 @@ TYPES = { 'wv': 'WavPack', 'mpc': 'Musepack', 'asf': 'Windows Media', + 'aiff': 'AIFF', } +# Exceptions. + +class UnreadableFileError(Exception): + """Mutagen is not able to extract information from the file. + """ + def __init__(self, path): + Exception.__init__(self, displayable_path(path)) + + +class FileTypeError(UnreadableFileError): + """Reading this type of file is not supported. + + If passed the `mutagen_type` argument this indicates that the + mutagen type is not supported by `Mediafile`. + """ + def __init__(self, path, mutagen_type=None): + path = displayable_path(path) + if mutagen_type is None: + msg = path + else: + msg = u'{0}: of mutagen type {1}'.format(path, mutagen_type) + Exception.__init__(self, msg) + + +class MutagenError(UnreadableFileError): + """Raised when Mutagen fails unexpectedly---probably due to a bug. + """ + def __init__(self, path, mutagen_exc): + msg = u'{0}: {1}'.format(displayable_path(path), mutagen_exc) + Exception.__init__(self, msg) + # Utility. @@ -105,10 +115,11 @@ def _safe_cast(out_type, val): returned. out_type should be bool, int, or unicode; otherwise, the value is just passed through. """ + if val is None: + return None + if out_type == int: - if val is None: - return 0 - elif isinstance(val, int) or isinstance(val, float): + if isinstance(val, int) or isinstance(val, float): # Just a number. return int(val) else: @@ -123,45 +134,37 @@ def _safe_cast(out_type, val): return int(val) elif out_type == bool: - if val is None: + try: + # Should work for strings, bools, ints: + return bool(int(val)) + except ValueError: return False - else: - try: - # Should work for strings, bools, ints: - return bool(int(val)) - except ValueError: - return False elif out_type == unicode: - if val is None: - return u'' + if isinstance(val, str): + return val.decode('utf8', 'ignore') + elif isinstance(val, unicode): + return val else: - if isinstance(val, str): - return val.decode('utf8', 'ignore') - elif isinstance(val, unicode): - return val - else: - return unicode(val) + return unicode(val) elif out_type == float: - if val is None: - return 0.0 - elif isinstance(val, int) or isinstance(val, float): + if isinstance(val, int) or isinstance(val, float): return float(val) else: if not isinstance(val, basestring): val = unicode(val) - val = re.match(r'[\+-]?[0-9\.]*', val.strip()).group(0) - if not val: - return 0.0 - else: - return float(val) + match = re.match(r'[\+-]?[0-9\.]+', val.strip()) + if match: + val = match.group(0) + if val: + return float(val) + return 0.0 else: return val - # Image coding for ASF/WMA. def _unpack_asf_image(data): @@ -189,6 +192,7 @@ def _unpack_asf_image(data): return (mime.decode("utf-16-le"), image_data, type, description.decode("utf-16-le")) + def _pack_asf_image(mime, data, type=3, description=""): """Pack image data for a WM/Picture tag. """ @@ -199,7 +203,6 @@ def _pack_asf_image(mime, data, type=3, description=""): return tag_data - # iTunes Sound Check encoding. def _sc_decode(soundcheck): @@ -211,7 +214,7 @@ def _sc_decode(soundcheck): try: soundcheck = soundcheck.replace(' ', '').decode('hex') soundcheck = struct.unpack('!iiiiiiiiii', soundcheck) - except (struct.error, TypeError): + except (struct.error, TypeError, UnicodeEncodeError): # SoundCheck isn't in the format we expect, so return default # values. return 0.0, 0.0 @@ -237,6 +240,7 @@ def _sc_decode(soundcheck): return round(gain, 2), round(peak, 6) + def _sc_encode(gain, peak): """Encode ReplayGain gain/peak values as a Sound Check string. """ @@ -261,10 +265,8 @@ def _sc_encode(gain, peak): return (u' %08X' * 10) % values - # Cover art and other images. - def _image_mime_type(data): """Return the MIME type of the image data (a bytestring). """ @@ -283,6 +285,32 @@ def _image_mime_type(data): return 'image/x-{0}'.format(kind) +class ImageType(enum.Enum): + """Indicates the kind of an `Image` stored in a file's tag. + """ + other = 0 + icon = 1 + other_icon = 2 + front = 3 + back = 4 + leaflet = 5 + media = 6 + lead_artist = 7 + artist = 8 + conductor = 9 + group = 10 + composer = 11 + lyricist = 12 + recording_location = 13 + recording_session = 14 + performance = 15 + screen_capture = 16 + fish = 17 + illustration = 18 + artist_logo = 19 + publisher_logo = 20 + + class Image(object): """Strucuture representing image data and metadata that can be stored and retrieved from tags. @@ -290,40 +318,15 @@ class Image(object): The structure has four properties. * ``data`` The binary data of the image * ``desc`` An optional descritpion of the image - * ``type`` A string denoting the type in relation to the music. - Must be one of the ``TYPES`` enum. + * ``type`` An instance of `ImageType` indicating the kind of image * ``mime_type`` Read-only property that contains the mime type of the binary data """ - TYPES = enum([ - 'other', - 'icon', - 'other icon', - 'front', - 'back', - 'leaflet', - 'media', - 'lead artist', - 'artist', - 'conductor', - 'group', - 'composer', - 'lyricist', - 'recording location', - 'recording session', - 'performance', - 'screen capture', - 'fish', - 'illustration', - 'artist logo', - 'publisher logo', - ], name='TageImage.TYPES') - def __init__(self, data, desc=None, type=None): self.data = data self.desc = desc if isinstance(type, int): - type = self.TYPES[type] + type = list(ImageType)[type] self.type = type @property @@ -334,9 +337,10 @@ class Image(object): @property def type_index(self): if self.type is None: - return None - return list(self.TYPES).index(self.type) - + # This method is used when a tag format requires the type + # index to be set, so we return "other" as the default value. + return 0 + return self.type.value # StorageStyle classes describe strategies for accessing values in @@ -349,8 +353,9 @@ class StorageStyle(object): describe more sophisticated translations or format-specific access strategies. - MediaFile uses a StorageStyle via two methods: ``get()`` and - ``set()``. It passes a Mutagen file object to each. + MediaFile uses a StorageStyle via three methods: ``get()``, + ``set()``, and ``delete()``. It passes a Mutagen file object to + each. Internally, the StorageStyle implements ``get()`` and ``set()`` using two steps that may be overridden by subtypes. To get a value, @@ -406,7 +411,7 @@ class StorageStyle(object): """ try: return mutagen_file[self.key][0] - except KeyError: + except (KeyError, IndexError): return None def deserialize(self, mutagen_value): @@ -414,7 +419,7 @@ class StorageStyle(object): return the represented value. """ if self.suffix and isinstance(mutagen_value, unicode) \ - and mutagen_value.endswith(self.suffix): + and mutagen_value.endswith(self.suffix): return mutagen_value[:-len(self.suffix)] else: return mutagen_value @@ -454,6 +459,12 @@ class StorageStyle(object): return value + def delete(self, mutagen_file): + """Remove the tag from the file. + """ + if self.key in mutagen_file: + del mutagen_file[self.key] + class ListStorageStyle(StorageStyle): """Abstract storage style that provides access to lists. @@ -517,9 +528,7 @@ class SoundCheckStorageStyleMixin(object): """ def get(self, mutagen_file): data = self.fetch(mutagen_file) - if data is None: - return 0 - else: + if data is not None: return _sc_decode(data)[self.index] def set(self, mutagen_file, value): @@ -570,7 +579,13 @@ class MP4TupleStorageStyle(MP4StorageStyle): return list(items) + [0] * (packing_length - len(items)) def get(self, mutagen_file): - return super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] + value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] + if value == 0: + # The values are always present and saved as integers. So we + # assume that "0" indicates it is not set. + return None + else: + return value def set(self, mutagen_file, value): if value is None: @@ -579,16 +594,23 @@ class MP4TupleStorageStyle(MP4StorageStyle): items[self.index] = int(value) self.store(mutagen_file, items) + def delete(self, mutagen_file): + if self.index == 0: + super(MP4TupleStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): pass class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): - def __init__(self, index=0, **kwargs): - super(MP4SoundCheckStorageStyle, self).__init__(**kwargs) + def __init__(self, key, index=0, **kwargs): + super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) self.index = index + class MP4BoolStorageStyle(MP4StorageStyle): """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type specifically for representing booleans.) @@ -631,7 +653,7 @@ class MP4ImageStorageStyle(MP4ListStorageStyle): class MP3StorageStyle(StorageStyle): """Store data in ID3 frames. """ - formats = ['MP3'] + formats = ['MP3', 'AIFF'] def __init__(self, key, id3_lang=None, **kwargs): """Create a new ID3 storage style. `id3_lang` is the value for @@ -643,7 +665,7 @@ class MP3StorageStyle(StorageStyle): def fetch(self, mutagen_file): try: return mutagen_file[self.key].text[0] - except KeyError: + except (KeyError, IndexError): return None def store(self, mutagen_file, value): @@ -713,7 +735,10 @@ class MP3DescStorageStyle(MP3StorageStyle): # need to make a new frame? if not found: frame = mutagen.id3.Frames[self.key]( - desc=str(self.description), text=value, encoding=3) + desc=str(self.description), + text=value, + encoding=3 + ) if self.id3_lang: frame.lang = self.id3_lang mutagen_file.tags.add(frame) @@ -728,6 +753,15 @@ class MP3DescStorageStyle(MP3StorageStyle): except IndexError: return None + def delete(self, mutagen_file): + found_frame = None + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + found_frame = frame + break + if found_frame is not None: + del mutagen_file[frame.HashKey] + class MP3SlashPackStorageStyle(MP3StorageStyle): """Store value as part of pair that is serialized as a slash- @@ -738,29 +772,38 @@ class MP3SlashPackStorageStyle(MP3StorageStyle): self.pack_pos = pack_pos def _fetch_unpacked(self, mutagen_file): - data = self.fetch(mutagen_file) or '' - items = unicode(data).split('/') + data = self.fetch(mutagen_file) + if data: + items = unicode(data).split('/') + else: + items = [] packing_length = 2 return list(items) + [None] * (packing_length - len(items)) def get(self, mutagen_file): - return self._fetch_unpacked(mutagen_file)[self.pack_pos] or 0 + return self._fetch_unpacked(mutagen_file)[self.pack_pos] def set(self, mutagen_file, value): items = self._fetch_unpacked(mutagen_file) items[self.pack_pos] = value if items[0] is None: - items[0] = 0 + items[0] = '' if items[1] is None: items.pop() # Do not store last value self.store(mutagen_file, '/'.join(map(unicode, items))) + def delete(self, mutagen_file): + if self.pack_pos == 0: + super(MP3SlashPackStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): """Converts between APIC frames and ``Image`` instances. The `get_list` method inherited from ``ListStorageStyle`` returns a - list of ``Image``s. Similarily the `set_list` method accepts a + list of ``Image``s. Similarly, the `set_list` method accepts a list of ``Image``s as its ``values`` arguemnt. """ def __init__(self): @@ -778,6 +821,9 @@ class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): def store(self, mutagen_file, frames): mutagen_file.tags.setall(self.key, frames) + def delete(self, mutagen_file): + mutagen_file.tags.delall(self.key) + def serialize(self, image): """Return an APIC frame populated with data from ``image``. """ @@ -787,7 +833,7 @@ class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): frame.mime = image.mime_type frame.desc = (image.desc or u'').encode('utf8') frame.encoding = 3 # UTF-8 encoding of desc - frame.type = image.type_index or 3 # front cover + frame.type = image.type_index return frame @@ -814,7 +860,7 @@ class ASFImageStorageStyle(ListStorageStyle): def serialize(self, image): pic = mutagen.asf.ASFByteArrayAttribute() pic.value = _pack_asf_image(image.mime_type, image.data, - type=image.type_index or 3, + type=image.type_index, description=image.desc or u'') return pic @@ -825,11 +871,12 @@ class VorbisImageStorageStyle(ListStorageStyle): base64-encoded. Values are `Image` objects. """ formats = ['OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac', 'APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio'] + 'OggFlac'] def __init__(self): super(VorbisImageStorageStyle, self).__init__( - key='metadata_block_picture') + key='metadata_block_picture' + ) self.as_type = str def fetch(self, mutagen_file): @@ -846,7 +893,7 @@ class VorbisImageStorageStyle(ListStorageStyle): except (TypeError, AttributeError): continue images.append(Image(data=pic.data, desc=pic.desc, - type=pic.type)) + type=pic.type)) return images def store(self, mutagen_file, image_data): @@ -862,7 +909,7 @@ class VorbisImageStorageStyle(ListStorageStyle): """ pic = mutagen.flac.Picture() pic.data = image.data - pic.type = image.type_index or 3 # Front cover + pic.type = image.type_index pic.mime = image.mime_type pic.desc = image.desc or u'' return base64.b64encode(pic.write()) @@ -895,11 +942,83 @@ class FlacImageStorageStyle(ListStorageStyle): """ pic = mutagen.flac.Picture() pic.data = image.data - pic.type = image.type_index or 3 # Front cover + pic.type = image.type_index pic.mime = image.mime_type pic.desc = image.desc or u'' return pic + def delete(self, mutagen_file): + """Remove all images from the file. + """ + mutagen_file.clear_pictures() + + +class APEv2ImageStorageStyle(ListStorageStyle): + """Store images in APEv2 tags. Values are `Image` objects. + """ + formats = ['APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio', 'OptimFROG'] + + TAG_NAMES = { + ImageType.other: 'Cover Art (other)', + ImageType.icon: 'Cover Art (icon)', + ImageType.other_icon: 'Cover Art (other icon)', + ImageType.front: 'Cover Art (front)', + ImageType.back: 'Cover Art (back)', + ImageType.leaflet: 'Cover Art (leaflet)', + ImageType.media: 'Cover Art (media)', + ImageType.lead_artist: 'Cover Art (lead)', + ImageType.artist: 'Cover Art (artist)', + ImageType.conductor: 'Cover Art (conductor)', + ImageType.group: 'Cover Art (band)', + ImageType.composer: 'Cover Art (composer)', + ImageType.lyricist: 'Cover Art (lyricist)', + ImageType.recording_location: 'Cover Art (studio)', + ImageType.recording_session: 'Cover Art (recording)', + ImageType.performance: 'Cover Art (performance)', + ImageType.screen_capture: 'Cover Art (movie scene)', + ImageType.fish: 'Cover Art (colored fish)', + ImageType.illustration: 'Cover Art (illustration)', + ImageType.artist_logo: 'Cover Art (band logo)', + ImageType.publisher_logo: 'Cover Art (publisher logo)', + } + + def __init__(self): + super(APEv2ImageStorageStyle, self).__init__(key='') + + def fetch(self, mutagen_file): + images = [] + for cover_type, cover_tag in self.TAG_NAMES.items(): + try: + frame = mutagen_file[cover_tag] + text_delimiter_index = frame.value.find('\x00') + comment = frame.value[0:text_delimiter_index] \ + if text_delimiter_index > 0 else None + image_data = frame.value[text_delimiter_index + 1:] + images.append(Image(data=image_data, type=cover_type, + desc=comment)) + except KeyError: + pass + + return images + + def set_list(self, mutagen_file, values): + self.delete(mutagen_file) + + for image in values: + image_type = image.type or ImageType.other + comment = image.desc or '' + image_data = comment + "\x00" + image.data + cover_tag = self.TAG_NAMES[image_type] + mutagen_file[cover_tag] = image_data + + def delete(self, mutagen_file): + """Remove all images from the file. + """ + for cover_tag in self.TAG_NAMES.values(): + try: + del mutagen_file[cover_tag] + except KeyError: + pass # MediaField is a descriptor that represents a single logical field. It @@ -913,12 +1032,14 @@ class MediaField(object): def __init__(self, *styles, **kwargs): """Creates a new MediaField. - - `styles`: `StorageStyle` instances that describe the strategy - for reading and writing the field in particular formats. - There must be at least one style for each possible file - format. - - `out_type`: the type of the value that should be returned when - getting this property. + :param styles: `StorageStyle` instances that describe the strategy + for reading and writing the field in particular + formats. There must be at least one style for + each possible file format. + + :param out_type: the type of the value that should be returned when + getting this property. + """ self.out_type = kwargs.get('out_type', unicode) self._styles = styles @@ -945,6 +1066,10 @@ class MediaField(object): for style in self.styles(mediafile.mgfile): style.set(mediafile.mgfile, value) + def __delete__(self, mediafile): + for style in self.styles(mediafile.mgfile): + style.delete(mediafile.mgfile) + def _none_value(self): """Get an appropriate "null" value for this field's type. This is used internally when setting the field to None. @@ -1007,27 +1132,40 @@ class DateField(MediaField): def __get__(self, mediafile, owner=None): year, month, day = self._get_date_tuple(mediafile) + if not year: + return None try: return datetime.date( - year or datetime.MINYEAR, + year, month or 1, day or 1 ) except ValueError: # Out of range values. - return datetime.date.min + return None def __set__(self, mediafile, date): - self._set_date_tuple(mediafile, date.year, date.month, date.day) + if date is None: + self._set_date_tuple(mediafile, None, None, None) + else: + self._set_date_tuple(mediafile, date.year, date.month, date.day) + + def __delete__(self, mediafile): + super(DateField, self).__delete__(mediafile) + if hasattr(self, '_year_field'): + self._year_field.__delete__(mediafile) def _get_date_tuple(self, mediafile): """Get a 3-item sequence representing the date consisting of a year, month, and day number. Each number is either an integer or None. """ - # Get the underlying data and split on hyphens. + # Get the underlying data and split on hyphens and slashes. datestring = super(DateField, self).__get__(mediafile, None) - datestring = re.sub(r'[Tt ].*$', '', unicode(datestring)) - items = unicode(datestring).split('-') + if isinstance(datestring, basestring): + datestring = re.sub(r'[Tt ].*$', '', unicode(datestring)) + items = re.split('[-/]', unicode(datestring)) + else: + items = [] # Ensure that we have exactly 3 components, possibly by # truncating or padding. @@ -1040,20 +1178,30 @@ class DateField(MediaField): items[0] = self._year_field.__get__(mediafile) # Convert each component to an integer if possible. - return [_safe_cast(int, item) for item in items] + items_ = [] + for item in items: + try: + items_.append(int(item)) + except: + items_.append(None) + return items_ def _set_date_tuple(self, mediafile, year, month=None, day=None): """Set the value of the field given a year, month, and day number. Each number can be an integer or None to indicate an unset component. """ - date = [year or 0] + if year is None: + self.__delete__(mediafile) + return + + date = [u'{0:04d}'.format(int(year))] if month: - date.append(month) + date.append(u'{0:02d}'.format(int(month))) if month and day: - date.append(day) + date.append(u'{0:02d}'.format(int(day))) date = map(unicode, date) - super(DateField, self).__set__(mediafile, '-'.join(date)) + super(DateField, self).__set__(mediafile, u'-'.join(date)) if hasattr(self, '_year_field'): self._year_field.__set__(mediafile, year) @@ -1084,6 +1232,9 @@ class DateItemField(MediaField): items[self.item_pos] = value self.date_field._set_date_tuple(mediafile, *items) + def __delete__(self, mediafile): + self.__set__(mediafile, None) + class CoverArtField(MediaField): """A descriptor that provides access to the *raw image data* for the @@ -1105,8 +1256,11 @@ class CoverArtField(MediaField): else: mediafile.images = [] + def __delete__(self, mediafile): + delattr(mediafile, 'images') -class ImageListField(MediaField): + +class ImageListField(ListMediaField): """Descriptor to access the list of images embedded in tags. The getter returns a list of `Image` instances obtained from @@ -1123,30 +1277,23 @@ class ImageListField(MediaField): ASFImageStorageStyle(), VorbisImageStorageStyle(), FlacImageStorageStyle(), + APEv2ImageStorageStyle(), + out_type=Image, ) - def __get__(self, mediafile, _): - images = [] - for style in self.styles(mediafile.mgfile): - images.extend(style.get_list(mediafile.mgfile)) - return images - - def __set__(self, mediafile, images): - for style in self.styles(mediafile.mgfile): - style.set_list(mediafile.mgfile, images) - - # MediaFile is a collection of fields. - class MediaFile(object): """Represents a multimedia file on disk and provides access to its metadata. """ - def __init__(self, path): - """Constructs a new MediaFile reflecting the file at path. May - throw UnreadableFileError. + def __init__(self, path, id3v23=False): + """Constructs a new `MediaFile` reflecting the file at path. May + throw `UnreadableFileError`. + + By default, MP3 files are saved with ID3v2.4 tags. You can use + the older ID3v2.3 standard by specifying the `id3v23` option. """ self.path = path @@ -1161,12 +1308,13 @@ class MediaFile(object): mutagen.ogg.error, mutagen.asf.error, mutagen.apev2.error, + mutagen.aiff.error, ) try: self.mgfile = mutagen.File(path) except unreadable_exc as exc: log.debug(u'header parsing failed: {0}'.format(unicode(exc))) - raise UnreadableFileError('Mutagen could not read file') + raise UnreadableFileError(path) except IOError as exc: if type(exc) == IOError: # This is a base IOError, not a subclass from Mutagen or @@ -1174,28 +1322,35 @@ class MediaFile(object): raise else: log.debug(traceback.format_exc()) - raise MutagenError('Mutagen raised an exception') + raise MutagenError(path, exc) except Exception as exc: # Isolate bugs in Mutagen. log.debug(traceback.format_exc()) - log.error('uncaught Mutagen exception in open: {0}'.format(exc)) - raise MutagenError('Mutagen raised an exception') + log.error(u'uncaught Mutagen exception in open: {0}'.format(exc)) + raise MutagenError(path, exc) - if self.mgfile is None: # Mutagen couldn't guess the type - raise FileTypeError('file type unsupported by Mutagen') - elif type(self.mgfile).__name__ == 'M4A' or \ - type(self.mgfile).__name__ == 'MP4': - # This hack differentiates AAC and ALAC until we find a more - # deterministic approach. Mutagen only sets the sample rate - # for AAC files. See: - # https://github.com/sampsyo/beets/pull/295 - if hasattr(self.mgfile.info, 'sample_rate') and \ - self.mgfile.info.sample_rate > 0: - self.type = 'aac' + if self.mgfile is None: + # Mutagen couldn't guess the type + raise FileTypeError(path) + elif (type(self.mgfile).__name__ == 'M4A' or + type(self.mgfile).__name__ == 'MP4'): + info = self.mgfile.info + if hasattr(info, 'codec'): + if info.codec and info.codec.startswith('alac'): + self.type = 'alac' + else: + self.type = 'aac' else: - self.type = 'alac' - elif type(self.mgfile).__name__ == 'ID3' or \ - type(self.mgfile).__name__ == 'MP3': + # This hack differentiates AAC and ALAC on versions of + # Mutagen < 1.26. Once Mutagen > 1.26 is out and + # required by beets, we can remove this. + if hasattr(self.mgfile.info, 'bitrate') and \ + self.mgfile.info.bitrate > 0: + self.type = 'aac' + else: + self.type = 'alac' + elif (type(self.mgfile).__name__ == 'ID3' or + type(self.mgfile).__name__ == 'MP3'): self.type = 'mp3' elif type(self.mgfile).__name__ == 'FLAC': self.type = 'flac' @@ -1211,22 +1366,24 @@ class MediaFile(object): self.type = 'mpc' elif type(self.mgfile).__name__ == 'ASF': self.type = 'asf' + elif type(self.mgfile).__name__ == 'AIFF': + self.type = 'aiff' else: - raise FileTypeError('file type %s unsupported by MediaFile' % - type(self.mgfile).__name__) + raise FileTypeError(path, type(self.mgfile).__name__) - # add a set of tags if it's missing + # Add a set of tags if it's missing. if self.mgfile.tags is None: self.mgfile.add_tags() - def save(self, id3v23=False): - """Write the object's tags back to the file. + # Set the ID3v2.3 flag only for MP3s. + self.id3v23 = id3v23 and self.type == 'mp3' - By default, MP3 files are saved with ID3v2.4 tags. You can use - the older ID3v2.3 standard by specifying the `id3v23` option. + def save(self): + """Write the object's tags back to the file. """ + # Possibly save the tags to ID3v2.3. kwargs = {} - if id3v23 and self.type == 'mp3': + if self.id3v23: id3 = self.mgfile if hasattr(id3, 'tags'): # In case this is an MP3 object, not an ID3 object. @@ -1242,8 +1399,8 @@ class MediaFile(object): raise except Exception as exc: log.debug(traceback.format_exc()) - log.error('uncaught Mutagen exception in save: {0}'.format(exc)) - raise MutagenError('Mutagen raised an exception') + log.error(u'uncaught Mutagen exception in save: {0}'.format(exc)) + raise MutagenError(self.path, exc) def delete(self): """Remove the current metadata tag from the file. @@ -1256,6 +1413,60 @@ class MediaFile(object): for tag in self.mgfile.keys(): del self.mgfile[tag] + # Convenient access to the set of available fields. + + @classmethod + def fields(cls): + """Get the names of all writable properties that reflect + metadata tags (i.e., those that are instances of + :class:`MediaField`). + """ + for property, descriptor in cls.__dict__.items(): + if isinstance(descriptor, MediaField): + yield property + + @classmethod + def readable_fields(cls): + """Get all metadata fields: the writable ones from + :meth:`fields` and also other audio properties. + """ + for property in cls.fields(): + yield property + for property in ('length', 'samplerate', 'bitdepth', 'bitrate', + 'channels', 'format'): + yield property + + @classmethod + def add_field(cls, name, descriptor): + """Add a field to store custom tags. + + :param name: the name of the property the field is accessed + through. It must not already exist on this class. + + :param descriptor: an instance of :class:`MediaField`. + """ + if not isinstance(descriptor, MediaField): + raise ValueError( + u'{0} must be an instance of MediaField'.format(descriptor)) + if name in cls.__dict__: + raise ValueError( + u'property "{0}" already exists on MediaField'.format(name)) + setattr(cls, name, descriptor) + + def update(self, dict): + """Set all field values from a dictionary. + + For any key in `dict` that is also a field to store tags the + method retrieves the corresponding value from `dict` and updates + the `MediaFile`. If a key has the value `None`, the + corresponding property is deleted from the `MediaFile`. + """ + for field in self.fields(): + if field in dict: + if dict[field] is None: + delattr(self, field) + else: + setattr(self, field, dict[field]) # Field definitions. @@ -1343,6 +1554,7 @@ class MediaFile(object): StorageStyle('DESCRIPTION'), StorageStyle('COMMENT'), ASFStorageStyle('WM/Comments'), + ASFStorageStyle('Description') ) bpm = MediaField( MP3StorageStyle('TBPM'), @@ -1430,8 +1642,8 @@ class MediaFile(object): ) country = MediaField( MP3DescStorageStyle('MusicBrainz Album Release Country'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz Album " - "Release Country"), + MP4StorageStyle("----:com.apple.iTunes:MusicBrainz " + "Album Release Country"), StorageStyle('RELEASECOUNTRY'), ASFStorageStyle('MusicBrainz/Album Release Country'), ) @@ -1546,66 +1758,111 @@ class MediaFile(object): # ReplayGain fields. rg_track_gain = MediaField( - MP3DescStorageStyle(u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB'), - MP3DescStorageStyle(u'replaygain_track_gain', - float_places=2, suffix=u' dB'), - MP3SoundCheckStorageStyle(key='COMM', index=0, desc=u'iTunNORM', - id3_lang='eng'), - MP4StorageStyle(key='----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=b' dB'), - MP4SoundCheckStorageStyle(key='----:com.apple.iTunes:iTunNORM', - index=0), - StorageStyle(u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB'), - ASFStorageStyle(u'replaygain_track_gain', - float_places=2, suffix=u' dB'), + MP3DescStorageStyle( + u'REPLAYGAIN_TRACK_GAIN', + float_places=2, suffix=u' dB' + ), + MP3DescStorageStyle( + u'replaygain_track_gain', + float_places=2, suffix=u' dB' + ), + MP3SoundCheckStorageStyle( + key='COMM', + index=0, desc=u'iTunNORM', + id3_lang='eng' + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_track_gain', + float_places=2, suffix=b' dB' + ), + MP4SoundCheckStorageStyle( + '----:com.apple.iTunes:iTunNORM', + index=0 + ), + StorageStyle( + u'REPLAYGAIN_TRACK_GAIN', + float_places=2, suffix=u' dB' + ), + ASFStorageStyle( + u'replaygain_track_gain', + float_places=2, suffix=u' dB' + ), out_type=float ) rg_album_gain = MediaField( - MP3DescStorageStyle(u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB'), - MP3DescStorageStyle(u'replaygain_album_gain', - float_places=2, suffix=u' dB'), - MP4SoundCheckStorageStyle(key='----:com.apple.iTunes:iTunNORM', - index=1), - StorageStyle(u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB'), - ASFStorageStyle(u'replaygain_album_gain', - float_places=2, suffix=u' dB'), + MP3DescStorageStyle( + u'REPLAYGAIN_ALBUM_GAIN', + float_places=2, suffix=u' dB' + ), + MP3DescStorageStyle( + u'replaygain_album_gain', + float_places=2, suffix=u' dB' + ), + MP4SoundCheckStorageStyle( + '----:com.apple.iTunes:iTunNORM', + index=1 + ), + StorageStyle( + u'REPLAYGAIN_ALBUM_GAIN', + float_places=2, suffix=u' dB' + ), + ASFStorageStyle( + u'replaygain_album_gain', + float_places=2, suffix=u' dB' + ), out_type=float ) rg_track_peak = MediaField( - MP3DescStorageStyle(u'REPLAYGAIN_TRACK_PEAK', - float_places=6), - MP3DescStorageStyle(u'replaygain_track_peak', - float_places=6), - MP3SoundCheckStorageStyle(key='COMM', index=1, desc=u'iTunNORM', - id3_lang='eng'), - MP4StorageStyle('----:com.apple.iTunes:replaygain_track_peak', - float_places=6), - MP4SoundCheckStorageStyle(key='----:com.apple.iTunes:iTunNORM', - index=1), - StorageStyle(u'REPLAYGAIN_TRACK_PEAK', - float_places=6), - ASFStorageStyle(u'replaygain_track_peak', - float_places=6), + MP3DescStorageStyle( + u'REPLAYGAIN_TRACK_PEAK', + float_places=6 + ), + MP3DescStorageStyle( + u'replaygain_track_peak', + float_places=6 + ), + MP3SoundCheckStorageStyle( + key=u'COMM', + index=1, desc=u'iTunNORM', + id3_lang='eng' + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_track_peak', + float_places=6 + ), + MP4SoundCheckStorageStyle( + '----:com.apple.iTunes:iTunNORM', + index=1 + ), + StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), + ASFStorageStyle(u'replaygain_track_peak', float_places=6), out_type=float, ) rg_album_peak = MediaField( - MP3DescStorageStyle(u'REPLAYGAIN_ALBUM_PEAK', - float_places=6), - MP3DescStorageStyle(u'replaygain_album_peak', - float_places=6), - MP4StorageStyle('----:com.apple.iTunes:replaygain_album_peak', - float_places=6), - StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', - float_places=6), - ASFStorageStyle(u'replaygain_album_peak', - float_places=6), + MP3DescStorageStyle( + u'REPLAYGAIN_ALBUM_PEAK', + float_places=6 + ), + MP3DescStorageStyle( + u'replaygain_album_peak', + float_places=6 + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_album_peak', + float_places=6 + ), + StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), + ASFStorageStyle(u'replaygain_album_peak', float_places=6), out_type=float, ) + initial_key = MediaField( + MP3StorageStyle('TKEY'), + MP4StorageStyle('----:com.apple.iTunes:initialkey'), + StorageStyle('INITIALKEY'), + ASFStorageStyle('INITIALKEY'), + ) + @property def length(self): """The duration of the audio in seconds (a float).""" diff --git a/lib/beets/plugins.py b/lib/beets/plugins.py index 6a58777c..8611b92a 100755 --- a/lib/beets/plugins.py +++ b/lib/beets/plugins.py @@ -16,8 +16,11 @@ import logging import traceback +import inspect +import re from collections import defaultdict + import beets from beets import mediafile @@ -30,6 +33,14 @@ LASTFM_KEY = '2dc3914abf35f0d9c92d97d8f8e42b43' log = logging.getLogger('beets') +class PluginConflictException(Exception): + """Indicates that the services provided by one plugin conflict with + those of another. + + For example two plugins may define different types for flexible fields. + """ + + # Managing the plugins themselves. class BeetsPlugin(object): @@ -40,7 +51,6 @@ class BeetsPlugin(object): def __init__(self, name=None): """Perform one-time plugin setup. """ - _add_media_fields(self.item_fields()) self.import_stages = [] self.name = name or self.__module__.split('.')[-1] self.config = beets.config[self.name] @@ -86,14 +96,6 @@ class BeetsPlugin(object): """ return () - def item_fields(self): - """Returns field descriptors to be added to the MediaFile class, - in the form of a dictionary whose keys are field names and whose - values are descriptor (e.g., MediaField) instances. The Library - database schema is not (currently) extended. - """ - return {} - def album_for_id(self, album_id): """Return an AlbumInfo object or None if no matching release was found. @@ -106,6 +108,20 @@ class BeetsPlugin(object): """ return None + def add_media_field(self, name, descriptor): + """Add a field that is synchronized between media files and items. + + When a media field is added ``item.write()`` will set the name + property of the item's MediaFile to ``item[name]`` and save the + changes. Similarly ``item.read()`` will set ``item[name]`` to + the value of the name property of the media file. + + ``descriptor`` must be an instance of ``mediafile.MediaField``. + """ + # Defer impor to prevent circular dependency + from beets import library + mediafile.MediaFile.add_field(name, descriptor) + library.Item._media_fields.add(name) listeners = None @@ -130,7 +146,7 @@ class BeetsPlugin(object): >>> @MyPlugin.listen("imported") >>> def importListener(**kwargs): - >>> pass + ... pass """ def helper(func): if cls.listeners is None: @@ -170,7 +186,10 @@ class BeetsPlugin(object): return func return helper + _classes = set() + + def load_plugins(names=()): """Imports the modules for a sequence of plugin names. Each name must be the name of a Python module under the "beetsplug" namespace @@ -185,7 +204,7 @@ def load_plugins(names=()): except ImportError as exc: # Again, this is hacky: if exc.args[0].endswith(' ' + name): - log.warn('** plugin %s not found' % name) + log.warn(u'** plugin {0} not found'.format(name)) else: raise else: @@ -195,10 +214,13 @@ def load_plugins(names=()): _classes.add(obj) except: - log.warn('** error loading plugin %s' % name) + log.warn(u'** error loading plugin {0}'.format(name)) log.warn(traceback.format_exc()) + _instances = {} + + def find_plugins(): """Returns a list of BeetsPlugin subclass instances from all currently loaded beets plugins. Loads the default plugin set @@ -224,6 +246,7 @@ def commands(): out += plugin.commands() return out + def queries(): """Returns a dict mapping prefix strings to Query subclasses all loaded plugins. @@ -233,6 +256,24 @@ def queries(): out.update(plugin.queries()) return out + +def types(model_cls): + # Gives us `item_types` and `album_types` + attr_name = '{0}_types'.format(model_cls.__name__.lower()) + types = {} + for plugin in find_plugins(): + plugin_types = getattr(plugin, attr_name, {}) + for field in plugin_types: + if field in types and plugin_types[field] != types[field]: + raise PluginConflictException( + u'Plugin {0} defines flexible field {1} ' + 'which has already been defined with ' + 'another type.'.format(plugin.name, field) + ) + types.update(plugin_types) + return types + + def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. Returns a Distance object. @@ -243,6 +284,7 @@ def track_distance(item, info): dist.update(plugin.track_distance(item, info)) return dist + def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" from beets.autotag.hooks import Distance @@ -251,6 +293,7 @@ def album_distance(items, album_info, mapping): dist.update(plugin.album_distance(items, album_info, mapping)) return dist + def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. """ @@ -259,6 +302,7 @@ def candidates(items, artist, album, va_likely): out.extend(plugin.candidates(items, artist, album, va_likely)) return out + def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ @@ -267,6 +311,7 @@ def item_candidates(item, artist, title): out.extend(plugin.item_candidates(item, artist, title)) return out + def album_for_id(album_id): """Get AlbumInfo objects for a given ID string. """ @@ -277,6 +322,7 @@ def album_for_id(album_id): out.append(res) return out + def track_for_id(track_id): """Get TrackInfo objects for a given ID string. """ @@ -287,6 +333,7 @@ def track_for_id(track_id): out.append(res) return out + def template_funcs(): """Get all the template functions declared by plugins as a dictionary. @@ -297,12 +344,6 @@ def template_funcs(): funcs.update(plugin.template_funcs) return funcs -def _add_media_fields(fields): - """Adds a {name: descriptor} dictionary of fields to the MediaFile - class. Called during the plugin initialization. - """ - for key, value in fields.iteritems(): - setattr(mediafile.MediaFile, key, value) def import_stages(): """Get a list of import stage functions defined by plugins.""" @@ -325,6 +366,7 @@ def item_field_getters(): funcs.update(plugin.template_fields) return funcs + def album_field_getters(): """As above, for album fields. """ @@ -348,6 +390,7 @@ def event_handlers(): all_handlers[event] += handlers return all_handlers + def send(event, **arguments): """Sends an event to all assigned event listeners. Event is the name of the event to send, all other named arguments go to the @@ -355,5 +398,38 @@ def send(event, **arguments): Returns a list of return values from the handlers. """ - log.debug('Sending event: %s' % event) - return [handler(**arguments) for handler in event_handlers()[event]] + log.debug(u'Sending event: {0}'.format(event)) + for handler in event_handlers()[event]: + # Don't break legacy plugins if we want to pass more arguments + argspec = inspect.getargspec(handler).args + args = dict((k, v) for k, v in arguments.items() if k in argspec) + handler(**args) + + +def feat_tokens(for_artist=True): + """Return a regular expression that matches phrases like "featuring" + that separate a main artist or a song title from secondary artists. + The `for_artist` option determines whether the regex should be + suitable for matching artist fields (the default) or title fields. + """ + feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.'] + if for_artist: + feat_words += ['with', 'vs', 'and', 'con', '&'] + return '(?<=\s)(?:{0})(?=\s)'.format( + '|'.join(re.escape(x) for x in feat_words) + ) + + +def sanitize_choices(choices, choices_all): + """Clean up a stringlist configuration attribute: keep only choices + elements present in choices_all, remove duplicate elements, expand '*' + wildcard while keeping original stringlist order. + """ + seen = set() + others = [x for x in choices_all if x not in choices] + res = [] + for s in choices: + if s in list(choices_all) + ['*']: + if not (s in seen or seen.add(s)): + res.extend(list(others) if s == '*' else [s]) + return res diff --git a/lib/beets/ui/__init__.py b/lib/beets/ui/__init__.py index 2df74ea7..8978ff54 100644 --- a/lib/beets/ui/__init__.py +++ b/lib/beets/ui/__init__.py @@ -29,6 +29,7 @@ import errno import re import struct import traceback +import os.path from beets import library from beets import plugins @@ -38,9 +39,7 @@ from beets import config from beets.util import confit from beets.autotag import mb - # On Windows platforms, use colorama to support "ANSI" terminal colors. - if sys.platform == 'win32': try: import colorama @@ -50,8 +49,10 @@ if sys.platform == 'win32': colorama.init() - -# Constants. +log = logging.getLogger('beets') +if not log.handlers: + log.addHandler(logging.StreamHandler()) +log.propagate = False # Don't propagate to root handler. PF_KEY_QUERIES = { @@ -59,19 +60,15 @@ PF_KEY_QUERIES = { 'singleton': 'singleton:true', } -# UI exception. Commands should throw this in order to display -# nonrecoverable errors to the user. + class UserError(Exception): - pass - -# Main logger. -log = logging.getLogger('beets') - + """UI exception. Commands should throw this in order to display + nonrecoverable errors to the user. + """ # Utilities. - def _encoding(): """Tries to guess the encoding used by the terminal.""" # Configured override? @@ -170,7 +167,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Infer a letter. for letter in option: if not letter.isalpha(): - continue # Don't use punctuation. + continue # Don't use punctuation. if letter not in letters: found_letter = letter break @@ -181,9 +178,10 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, index = option.index(found_letter) # Mark the option's shortcut letter for display. - if not require and ((default is None and not numrange and first) or - (isinstance(default, basestring) and - found_letter.lower() == default.lower())): + if not require and ( + (default is None and not numrange and first) or + (isinstance(default, basestring) and + found_letter.lower() == default.lower())): # The first option is the default; mark it. show_letter = '[%s]' % found_letter.upper() is_default = True @@ -352,11 +350,13 @@ def human_seconds_short(interval): # http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py # (pygments is by Tim Hatch, Armin Ronacher, et al.) COLOR_ESCAPE = "\x1b[" -DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue", - "purple", "teal", "lightgray"] +DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue", + "purple", "teal", "lightgray"] LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white"] RESET_COLOR = COLOR_ESCAPE + "39;49;00m" + + def _colorize(color, text): """Returns a string that prints the given text in the given color in a terminal that is ANSI color-aware. The color must be something @@ -441,30 +441,6 @@ def colordiff(a, b, highlight='red'): return unicode(a), unicode(b) -def color_diff_suffix(a, b, highlight='red'): - """Colorize the differing suffix between two strings.""" - a, b = unicode(a), unicode(b) - if not config['color']: - return a, b - - # Fast path. - if a == b: - return a, b - - # Find the longest common prefix. - first_diff = None - for i in range(min(len(a), len(b))): - if a[i] != b[i]: - first_diff = i - break - else: - first_diff = min(len(a), len(b)) - - # Colorize from the first difference on. - return a[:first_diff] + colorize(highlight, a[first_diff:]), \ - b[:first_diff] + colorize(highlight, b[first_diff:]) - - def get_path_formats(subview=None): """Get the configuration's path formats as a list of query/template pairs. @@ -494,21 +470,6 @@ def get_replacements(): return replacements -def get_plugin_paths(): - """Get the list of search paths for plugins from the config file. - The value for "pluginpath" may be a single string or a list of - strings. - """ - pluginpaths = config['pluginpath'].get() - if isinstance(pluginpaths, basestring): - pluginpaths = [pluginpaths] - if not isinstance(pluginpaths, list): - raise confit.ConfigTypeError( - u'pluginpath must be string or a list of strings' - ) - return map(util.normpath, pluginpaths) - - def _pick_format(album, fmt=None): """Pick a format string for printing Album or Item objects, falling back to config options and defaults. @@ -558,6 +519,8 @@ def term_width(): FLOAT_EPSILON = 0.01 + + def _field_diff(field, old, new): """Given two Model objects, format their values for `field` and highlight changes among them. Return a human-readable string. If the @@ -574,13 +537,13 @@ def _field_diff(field, old, new): return None # Get formatted values for output. - oldstr = old._get_formatted(field) - newstr = new._get_formatted(field) + oldstr = old.formatted().get(field, u'') + newstr = new.formatted().get(field, u'') # For strings, highlight changes. For others, colorize the whole # thing. if isinstance(oldval, basestring): - oldstr, newstr = colordiff(oldval, newval) + oldstr, newstr = colordiff(oldval, newstr) else: oldstr, newstr = colorize('red', oldstr), colorize('red', newstr) @@ -613,9 +576,12 @@ def show_model_changes(new, old=None, fields=None, always=False): # New fields. for field in set(new) - set(old): + if fields and field not in fields: + continue + changes.append(u' {0}: {1}'.format( field, - colorize('red', new._get_formatted(field)) + colorize('red', new.formatted()[field]) )) # Print changes. @@ -627,10 +593,8 @@ def show_model_changes(new, old=None, fields=None, always=False): return bool(changes) - # Subcommand parsing infrastructure. - - +# # This is a fairly generic subcommand parser for optparse. It is # maintained externally here: # http://gist.github.com/462717 @@ -653,46 +617,56 @@ class Subcommand(object): self.aliases = aliases self.help = help self.hide = hide + self._root_parser = None + + def print_help(self): + self.parser.print_help() + + def parse_args(self, args): + return self.parser.parse_args(args) + + @property + def root_parser(self): + return self._root_parser + + @root_parser.setter + def root_parser(self, root_parser): + self._root_parser = root_parser + self.parser.prog = '{0} {1}'.format(root_parser.get_prog_name(), + self.name) + class SubcommandsOptionParser(optparse.OptionParser): """A variant of OptionParser that parses subcommands and their arguments. """ - # A singleton command used to give help on other subcommands. - _HelpSubcommand = Subcommand('help', optparse.OptionParser(), - help='give detailed help on a specific sub-command', - aliases=('?',)) def __init__(self, *args, **kwargs): """Create a new subcommand-aware option parser. All of the options to OptionParser.__init__ are supported in addition to subcommands, a sequence of Subcommand objects. """ - # The subcommand array, with the help command included. - self.subcommands = list(kwargs.pop('subcommands', [])) - self.subcommands.append(self._HelpSubcommand) - # A more helpful default usage. if 'usage' not in kwargs: kwargs['usage'] = """ %prog COMMAND [ARGS...] %prog help COMMAND""" + kwargs['add_help_option'] = False # Super constructor. optparse.OptionParser.__init__(self, *args, **kwargs) - # Adjust the help-visible name of each subcommand. - for subcommand in self.subcommands: - subcommand.parser.prog = '%s %s' % \ - (self.get_prog_name(), subcommand.name) - # Our root parser needs to stop on the first unrecognized argument. self.disable_interspersed_args() - def add_subcommand(self, cmd): + self.subcommands = [] + + def add_subcommand(self, *cmds): """Adds a Subcommand object to the parser's list of commands. """ - self.subcommands.append(cmd) + for cmd in cmds: + cmd.root_parser = self + self.subcommands.append(cmd) # Add the list of subcommands to the help message. def format_help(self, formatter=None): @@ -711,6 +685,7 @@ class SubcommandsOptionParser(optparse.OptionParser): disp_names = [] help_position = 0 subcommands = [c for c in self.subcommands if not c.hide] + subcommands.sort(key=lambda c: c.name) for subcommand in subcommands: name = subcommand.name if subcommand.aliases: @@ -756,52 +731,40 @@ class SubcommandsOptionParser(optparse.OptionParser): return subcommand return None - def parse_args(self, a=None, v=None): - """Like OptionParser.parse_args, but returns these four items: - - options: the options passed to the root parser - - subcommand: the Subcommand object that was invoked - - suboptions: the options passed to the subcommand parser - - subargs: the positional arguments passed to the subcommand + def parse_global_options(self, args): + """Parse options up to the subcommand argument. Returns a tuple + of the options object and the remaining arguments. """ - options, args = optparse.OptionParser.parse_args(self, a, v) - subcommand, suboptions, subargs = self._parse_sub(args) - return options, subcommand, suboptions, subargs + options, subargs = self.parse_args(args) - def _parse_sub(self, args): - """Given the `args` left unused by a typical OptionParser - `parse_args`, return the invoked subcommand, the subcommand - options, and the subcommand arguments. + # Force the help command + if options.help: + subargs = ['help'] + elif options.version: + subargs = ['version'] + return options, subargs + + def parse_subcommand(self, args): + """Given the `args` left unused by a `parse_global_options`, + return the invoked subcommand, the subcommand options, and the + subcommand arguments. """ + # Help is default command if not args: - # No command given. - self.print_help() - self.exit() - else: - cmdname = args.pop(0) - subcommand = self._subcommand_for_name(cmdname) - if not subcommand: - self.error('unknown command ' + cmdname) + args = ['help'] - suboptions, subargs = subcommand.parser.parse_args(args) - - if subcommand is self._HelpSubcommand: - if subargs: - # particular - cmdname = subargs[0] - helpcommand = self._subcommand_for_name(cmdname) - if not helpcommand: - self.error('no command named {0}'.format(cmdname)) - helpcommand.parser.print_help() - self.exit() - else: - # general - self.print_help() - self.exit() + cmdname = args.pop(0) + subcommand = self._subcommand_for_name(cmdname) + if not subcommand: + raise UserError("unknown command '{0}'".format(cmdname)) + suboptions, subargs = subcommand.parse_args(args) return subcommand, suboptions, subargs optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) + + def vararg_callback(option, opt_str, value, parser): """Callback for an option with variable arguments. Manually collect arguments right of a callback-action @@ -838,53 +801,54 @@ def vararg_callback(option, opt_str, value, parser): setattr(parser.values, option.dest, value) - # The main entry point and bootstrapping. - -def _load_plugins(): +def _load_plugins(config): """Load the plugins specified in the configuration. """ - # Add plugin paths. + paths = config['pluginpath'].get(confit.StrSeq(split=False)) + paths = map(util.normpath, paths) + import beetsplug - beetsplug.__path__ = get_plugin_paths() + beetsplug.__path__ - + beetsplug.__path__ = paths + beetsplug.__path__ # For backwards compatibility. - sys.path += get_plugin_paths() + sys.path += paths - # Load requested plugins. plugins.load_plugins(config['plugins'].as_str_seq()) plugins.send("pluginload") + return plugins -def _configure(args): - """Parse the command line, load configuration files (including - loading any indicated plugins), and return the invoked subcomand, - the subcommand options, and the subcommand arguments. +def _setup(options, lib=None): + """Prepare and global state and updates it with command line options. + + Returns a list of subcommands, a list of plugins, and a library instance. """ - # Temporary: Migrate from 1.0-style configuration. - from beets.ui import migrate - migrate.automigrate() + # Configure the MusicBrainz API. + mb.configure() + + config = _configure(options) + + plugins = _load_plugins(config) # Get the default subcommands. from beets.ui.commands import default_commands - # Construct the root parser. - commands = list(default_commands) - commands.append(migrate.migrate_cmd) # Temporary. - parser = SubcommandsOptionParser(subcommands=commands) - parser.add_option('-l', '--library', dest='library', - help='library database file to use') - parser.add_option('-d', '--directory', dest='directory', - help="destination music directory") - parser.add_option('-v', '--verbose', dest='verbose', action='store_true', - help='print debugging information') - parser.add_option('-c', '--config', dest='config', - help='path to configuration file') + subcommands = list(default_commands) + subcommands.extend(plugins.commands()) - # Parse the command-line! - options, args = optparse.OptionParser.parse_args(parser, args) + if lib is None: + lib = _open_library(config) + plugins.send("library_opened", lib=lib) + library.Item._types = plugins.types(library.Item) + library.Album._types = plugins.types(library.Album) + return subcommands, plugins, lib + + +def _configure(options): + """Amend the global configuration object with command line options. + """ # Add any additional config files specified with --config. This # special handling lets specified plugins get loaded before we # finish parsing the command line. @@ -894,22 +858,28 @@ def _configure(args): config.set_file(config_path) config.set_args(options) - # Now add the plugin commands to the parser. - _load_plugins() - for cmd in plugins.commands(): - parser.add_subcommand(cmd) + # Configure the logger. + if config['verbose'].get(bool): + log.setLevel(logging.DEBUG) + else: + log.setLevel(logging.INFO) - # Parse the remainder of the command line with loaded plugins. - return parser._parse_sub(args) + config_path = config.user_config_path() + if os.path.isfile(config_path): + log.debug(u'user configuration: {0}'.format( + util.displayable_path(config_path))) + else: + log.debug(u'no user configuration found at {0}'.format( + util.displayable_path(config_path))) + + log.debug(u'data directory: {0}' + .format(util.displayable_path(config.config_dir()))) + return config -def _raw_main(args): - """A helper function for `main` without top-level exception - handling. +def _open_library(config): + """Create a new library instance from the configuration. """ - subcommand, suboptions, subargs = _configure(args) - - # Open library file. dbpath = config['library'].as_filename() try: lib = library.Library( @@ -918,32 +888,52 @@ def _raw_main(args): get_path_formats(), get_replacements(), ) - except sqlite3.OperationalError: + lib.get_item(0) # Test database connection. + except (sqlite3.OperationalError, sqlite3.DatabaseError): + log.debug(traceback.format_exc()) raise UserError(u"database file {0} could not be opened".format( util.displayable_path(dbpath) )) - plugins.send("library_opened", lib=lib) + log.debug(u'library database: {0}\n' + u'library directory: {1}' + .format(util.displayable_path(lib.path), + util.displayable_path(lib.directory))) + return lib - # Configure the logger. - if config['verbose'].get(bool): - log.setLevel(logging.DEBUG) - else: - log.setLevel(logging.INFO) - log.debug(u'data directory: {0}\n' - u'library database: {1}\n' - u'library directory: {2}' - .format( - util.displayable_path(config.config_dir()), - util.displayable_path(lib.path), - util.displayable_path(lib.directory), - ) - ) - # Configure the MusicBrainz API. - mb.configure() +def _raw_main(args, lib=None): + """A helper function for `main` without top-level exception + handling. + """ + parser = SubcommandsOptionParser() + parser.add_option('-l', '--library', dest='library', + help='library database file to use') + parser.add_option('-d', '--directory', dest='directory', + help="destination music directory") + parser.add_option('-v', '--verbose', dest='verbose', action='store_true', + help='print debugging information') + parser.add_option('-c', '--config', dest='config', + help='path to configuration file') + parser.add_option('-h', '--help', dest='help', action='store_true', + help='how this help message and exit') + parser.add_option('--version', dest='version', action='store_true', + help=optparse.SUPPRESS_HELP) - # Invoke the subcommand. + options, subargs = parser.parse_global_options(args) + + # Special case for the `config --edit` command: bypass _setup so + # that an invalid configuration does not prevent the editor from + # starting. + if subargs[0] == 'config' and ('-e' in subargs or '--edit' in subargs): + from beets.ui.commands import config_edit + return config_edit() + + subcommands, plugins, lib = _setup(options, lib) + parser.add_subcommand(*subcommands) + + subcommand, suboptions, subargs = parser.parse_subcommand(subargs) subcommand.func(lib, suboptions, subargs) + plugins.send('cli_exit', lib=lib) diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py index 3d724f60..4dfac11c 100644 --- a/lib/beets/ui/commands.py +++ b/lib/beets/ui/commands.py @@ -20,15 +20,16 @@ from __future__ import print_function import logging import os import time -import itertools import codecs import platform +import re +import shlex import beets from beets import ui from beets.ui import print_, input_, decargs from beets import autotag -from beets.autotag import recommendation +from beets.autotag import Recommendation from beets.autotag import hooks from beets import plugins from beets import importer @@ -39,6 +40,8 @@ from beets import library from beets import config from beets.util.confit import _package_path +VARIOUS_ARTISTS = u'Various Artists' + # Global logger. log = logging.getLogger('beets') @@ -47,10 +50,8 @@ log = logging.getLogger('beets') default_commands = [] - # Utilities. - def _do_query(lib, query, album, also_items=True): """For commands that operate on matched items, performs a query and returns a list of matching items and a list of matching @@ -79,9 +80,6 @@ def _do_query(lib, query, album, also_items=True): # fields: Shows a list of available fields for queries and format strings. -fields_cmd = ui.Subcommand('fields', - help='show fields available for queries and format strings') - def fields_func(lib, opts, args): def _print_rows(names): print(" " + "\n ".join(names)) @@ -99,20 +97,47 @@ def fields_func(lib, opts, args): _print_rows(plugin_fields) print("Item fields:") - _print_rows(library.ITEM_KEYS) + _print_rows(library.Item._fields.keys()) _show_plugin_fields(False) print("\nAlbum fields:") - _print_rows(library.ALBUM_KEYS) + _print_rows(library.Album._fields.keys()) _show_plugin_fields(True) + +fields_cmd = ui.Subcommand( + 'fields', + help='show fields available for queries and format strings' +) fields_cmd.func = fields_func default_commands.append(fields_cmd) -# import: Autotagger and importer. +# help: Print help text for commands -VARIOUS_ARTISTS = u'Various Artists' +class HelpCommand(ui.Subcommand): + + def __init__(self): + super(HelpCommand, self).__init__( + 'help', aliases=('?',), + help='give detailed help on a specific sub-command', + ) + + def func(self, lib, opts, args): + if args: + cmdname = args[0] + helpcommand = self.root_parser._subcommand_for_name(cmdname) + if not helpcommand: + raise ui.UserError("unknown command '{0}'".format(cmdname)) + helpcommand.print_help() + else: + self.root_parser.print_help() + + +default_commands.append(HelpCommand()) + + +# import: Autotagger and importer. # Importer utilities and support. @@ -145,6 +170,7 @@ def disambig_string(info): if disambig: return u', '.join(disambig) + def dist_string(dist): """Formats a distance (a float) as a colorized similarity percentage string. @@ -158,6 +184,7 @@ def dist_string(dist): out = ui.colorize('red', out) return out + def penalty_string(distance, limit=None): """Returns a colorized string that indicates all the penalties applied to a distance object. @@ -173,6 +200,7 @@ def penalty_string(distance, limit=None): penalties = penalties[:limit] + ['...'] return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch @@ -213,13 +241,13 @@ def show_change(cur_artist, cur_album, match): (cur_album != match.info.album and match.info.album != VARIOUS_ARTISTS): artist_l, artist_r = cur_artist or '', match.info.artist - album_l, album_r = cur_album or '', match.info.album + album_l, album_r = cur_album or '', match.info.album if artist_r == VARIOUS_ARTISTS: # Hide artists for VA releases. artist_l, artist_r = u'', u'' artist_l, artist_r = ui.colordiff(artist_l, artist_r) - album_l, album_r = ui.colordiff(album_l, album_r) + album_l, album_r = ui.colordiff(album_l, album_r) print_("Correcting tags from:") show_album(artist_l, album_l) @@ -291,17 +319,9 @@ def show_change(cur_artist, cur_album, match): color = 'lightgray' else: color = 'red' - if (cur_track + new_track).count('-') == 1: - lhs_track, rhs_track = ui.colorize(color, cur_track), \ - ui.colorize(color, new_track) - else: - color = 'red' - lhs_track, rhs_track = ui.color_diff_suffix(cur_track, - new_track) - templ = ui.colorize(color, u' (#') + u'{0}' + \ - ui.colorize(color, u')') - lhs += templ.format(lhs_track) - rhs += templ.format(rhs_track) + templ = ui.colorize(color, u' (#{0})') + lhs += templ.format(cur_track) + rhs += templ.format(new_track) lhs_width += len(cur_track) + 4 # Length change. @@ -310,12 +330,9 @@ def show_change(cur_artist, cur_album, match): config['ui']['length_diff_thresh'].as_number(): cur_length = ui.human_seconds_short(item.length) new_length = ui.human_seconds_short(track_info.length) - lhs_length, rhs_length = ui.color_diff_suffix(cur_length, - new_length) - templ = ui.colorize('red', u' (') + u'{0}' + \ - ui.colorize('red', u')') - lhs += templ.format(lhs_length) - rhs += templ.format(rhs_length) + templ = ui.colorize('red', u' ({0})') + lhs += templ.format(cur_length) + rhs += templ.format(new_length) lhs_width += len(cur_length) + 3 # Penalties. @@ -357,6 +374,7 @@ def show_change(cur_artist, cur_album, match): line += ' (%s)' % ui.human_seconds_short(item.length) print_(ui.colorize('yellow', line)) + def show_item_change(item, match): """Print out the change that would occur by tagging `item` with the metadata from `match`, a TrackMatch object. @@ -394,6 +412,38 @@ def show_item_change(item, match): info.append(ui.colorize('lightgray', '(%s)' % disambig)) print_(' '.join(info)) + +def summarize_items(items, singleton): + """Produces a brief summary line describing a set of items. Used for + manually resolving duplicates during import. + + `items` is a list of `Item` objects. `singleton` indicates whether + this is an album or single-item import (if the latter, them `items` + should only have one element). + """ + summary_parts = [] + if not singleton: + summary_parts.append("{0} items".format(len(items))) + + format_counts = {} + for item in items: + format_counts[item.format] = format_counts.get(item.format, 0) + 1 + if len(format_counts) == 1: + # A single format. + summary_parts.append(items[0].format) + else: + # Enumerate all the formats. + for format, count in format_counts.iteritems(): + summary_parts.append('{0} {1}'.format(format, count)) + + average_bitrate = sum([item.bitrate for item in items]) / len(items) + total_duration = sum([item.length for item in items]) + summary_parts.append('{0}kbps'.format(int(average_bitrate / 1000))) + summary_parts.append(ui.human_seconds_short(total_duration)) + + return ', '.join(summary_parts) + + def _summary_judment(rec): """Determines whether a decision should be made without even asking the user. This occurs in quiet mode and when an action is chosen for @@ -402,7 +452,7 @@ def _summary_judment(rec): made. """ if config['import']['quiet']: - if rec == recommendation.strong: + if rec == Recommendation.strong: return importer.action.APPLY else: action = config['import']['quiet_fallback'].as_choice({ @@ -410,7 +460,7 @@ def _summary_judment(rec): 'asis': importer.action.ASIS, }) - elif rec == recommendation.none: + elif rec == Recommendation.none: action = config['import']['none_rec_action'].as_choice({ 'skip': importer.action.SKIP, 'asis': importer.action.ASIS, @@ -426,6 +476,7 @@ def _summary_judment(rec): print_('Importing as-is.') return action + def choose_candidate(candidates, singleton, rec, cur_artist=None, cur_album=None, item=None, itemcount=None): """Given a sorted list of candidates, ask the user for a selection @@ -479,13 +530,13 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Is the change good enough? bypass_candidates = False - if rec != recommendation.none: + if rec != Recommendation.none: match = candidates[0] bypass_candidates = True while True: # Display and choose from candidates. - require = rec <= recommendation.low + require = rec <= Recommendation.low if not bypass_candidates: # Display list of candidates. @@ -559,7 +610,7 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, show_change(cur_artist, cur_album, match) # Exact match => tag automatically if we're not in timid mode. - if rec == recommendation.strong and not config['import']['timid']: + if rec == Recommendation.strong and not config['import']['timid']: return match # Ask for confirmation. @@ -597,6 +648,7 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, elif sel == 'i': return importer.action.MANUAL_ID + def manual_search(singleton): """Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. @@ -605,12 +657,14 @@ def manual_search(singleton): name = input_('Track:' if singleton else 'Album:') return artist.strip(), name.strip() + def manual_id(singleton): """Input an ID, either for an album ("release") or a track ("recording"). """ prompt = u'Enter {0} ID:'.format('recording' if singleton else 'release') return input_(prompt).strip() + class TerminalImportSession(importer.ImportSession): """An import session that runs in a terminal. """ @@ -637,12 +691,14 @@ class TerminalImportSession(importer.ImportSession): candidates, rec = task.candidates, task.rec while True: # Ask for a choice from the user. - choice = choose_candidate(candidates, False, rec, task.cur_artist, - task.cur_album, itemcount=len(task.items)) + choice = choose_candidate( + candidates, False, rec, task.cur_artist, task.cur_album, + itemcount=len(task.items) + ) # Choose which tags to use. if choice in (importer.action.SKIP, importer.action.ASIS, - importer.action.TRACKS, importer.action.ALBUMS): + importer.action.TRACKS, importer.action.ALBUMS): # Pass selection to main control flow. return choice elif choice is importer.action.MANUAL: @@ -688,35 +744,47 @@ class TerminalImportSession(importer.ImportSession): if choice in (importer.action.SKIP, importer.action.ASIS): return choice elif choice == importer.action.TRACKS: - assert False # TRACKS is only legal for albums. + assert False # TRACKS is only legal for albums. elif choice == importer.action.MANUAL: # Continue in the loop with a new set of candidates. search_artist, search_title = manual_search(True) candidates, rec = autotag.tag_item(task.item, search_artist, - search_title) + search_title) elif choice == importer.action.MANUAL_ID: # Ask for a track ID. search_id = manual_id(True) if search_id: candidates, rec = autotag.tag_item(task.item, - search_id=search_id) + search_id=search_id) else: # Chose a candidate. assert isinstance(choice, autotag.TrackMatch) return choice - def resolve_duplicate(self, task): + def resolve_duplicate(self, task, found_duplicates): """Decide what to do when a new album or item seems similar to one that's already in the library. """ - log.warn("This %s is already in the library!" % - ("album" if task.is_album else "item")) + log.warn(u"This {0} is already in the library!" + .format("album" if task.is_album else "item")) if config['import']['quiet']: # In quiet mode, don't prompt -- just skip. - log.info('Skipping.') + log.info(u'Skipping.') sel = 's' else: + # Print some detail about the existing and new items so the + # user can make an informed decision. + for duplicate in found_duplicates: + print("Old: " + summarize_items( + list(duplicate.items()) if task.is_album else [duplicate], + not task.is_album, + )) + print("New: " + summarize_items( + task.imported_items(), + not task.is_album, + )) + sel = ui.input_options( ('Skip new', 'Keep both', 'Remove old') ) @@ -729,7 +797,7 @@ class TerminalImportSession(importer.ImportSession): pass elif sel == 'r': # Remove old. - task.remove_duplicates = True + task.should_remove_duplicates = True else: assert False @@ -740,18 +808,15 @@ class TerminalImportSession(importer.ImportSession): # The import command. + def import_files(lib, paths, query): """Import the files in the given list of paths or matching the query. """ # Check the user-specified directories. for path in paths: - fullpath = syspath(normpath(path)) - if not config['import']['singletons'] and not os.path.isdir(fullpath): - raise ui.UserError(u'not a directory: {0}'.format( - displayable_path(path))) - elif config['import']['singletons'] and not os.path.exists(fullpath): - raise ui.UserError(u'no such file: {0}'.format( + if not os.path.exists(syspath(normpath(path))): + raise ui.UserError(u'no such file or directory: {0}'.format( displayable_path(path))) # Check parameter consistency. @@ -787,43 +852,7 @@ def import_files(lib, paths, query): # Emit event. plugins.send('import', lib=lib, paths=paths) -import_cmd = ui.Subcommand('import', help='import new music', - aliases=('imp', 'im')) -import_cmd.parser.add_option('-c', '--copy', action='store_true', - default=None, help="copy tracks into library directory (default)") -import_cmd.parser.add_option('-C', '--nocopy', action='store_false', - dest='copy', help="don't copy tracks (opposite of -c)") -import_cmd.parser.add_option('-w', '--write', action='store_true', - default=None, help="write new metadata to files' tags (default)") -import_cmd.parser.add_option('-W', '--nowrite', action='store_false', - dest='write', help="don't write metadata (opposite of -w)") -import_cmd.parser.add_option('-a', '--autotag', action='store_true', - dest='autotag', help="infer tags for imported files (default)") -import_cmd.parser.add_option('-A', '--noautotag', action='store_false', - dest='autotag', - help="don't infer tags for imported files (opposite of -a)") -import_cmd.parser.add_option('-p', '--resume', action='store_true', - default=None, help="resume importing if interrupted") -import_cmd.parser.add_option('-P', '--noresume', action='store_false', - dest='resume', help="do not try to resume importing") -import_cmd.parser.add_option('-q', '--quiet', action='store_true', - dest='quiet', help="never prompt for input: skip albums instead") -import_cmd.parser.add_option('-l', '--log', dest='log', - help='file to log untaggable albums for later review') -import_cmd.parser.add_option('-s', '--singletons', action='store_true', - help='import individual tracks instead of full albums') -import_cmd.parser.add_option('-t', '--timid', dest='timid', - action='store_true', help='always confirm all actions') -import_cmd.parser.add_option('-L', '--library', dest='library', - action='store_true', help='retag items matching a query') -import_cmd.parser.add_option('-i', '--incremental', dest='incremental', - action='store_true', help='skip already-imported directories') -import_cmd.parser.add_option('-I', '--noincremental', dest='incremental', - action='store_false', help='do not skip already-imported directories') -import_cmd.parser.add_option('--flat', dest='flat', - action='store_true', help='import an entire tree as a single album') -import_cmd.parser.add_option('-g', '--group-albums', dest='group_albums', - action='store_true', help='group tracks in a folder into seperate albums') + def import_func(lib, opts, args): config['import'].set_args(opts) @@ -842,6 +871,83 @@ def import_func(lib, opts, args): raise ui.UserError('no path specified') import_files(lib, paths, query) + + +import_cmd = ui.Subcommand( + 'import', help='import new music', aliases=('imp', 'im') +) +import_cmd.parser.add_option( + '-c', '--copy', action='store_true', default=None, + help="copy tracks into library directory (default)" +) +import_cmd.parser.add_option( + '-C', '--nocopy', action='store_false', dest='copy', + help="don't copy tracks (opposite of -c)" +) +import_cmd.parser.add_option( + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" +) +import_cmd.parser.add_option( + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" +) +import_cmd.parser.add_option( + '-a', '--autotag', action='store_true', dest='autotag', + help="infer tags for imported files (default)" +) +import_cmd.parser.add_option( + '-A', '--noautotag', action='store_false', dest='autotag', + help="don't infer tags for imported files (opposite of -a)" +) +import_cmd.parser.add_option( + '-p', '--resume', action='store_true', default=None, + help="resume importing if interrupted" +) +import_cmd.parser.add_option( + '-P', '--noresume', action='store_false', dest='resume', + help="do not try to resume importing" +) +import_cmd.parser.add_option( + '-q', '--quiet', action='store_true', dest='quiet', + help="never prompt for input: skip albums instead" +) +import_cmd.parser.add_option( + '-l', '--log', dest='log', + help='file to log untaggable albums for later review' +) +import_cmd.parser.add_option( + '-s', '--singletons', action='store_true', + help='import individual tracks instead of full albums' +) +import_cmd.parser.add_option( + '-t', '--timid', dest='timid', action='store_true', + help='always confirm all actions' +) +import_cmd.parser.add_option( + '-L', '--library', dest='library', action='store_true', + help='retag items matching a query' +) +import_cmd.parser.add_option( + '-i', '--incremental', dest='incremental', action='store_true', + help='skip already-imported directories' +) +import_cmd.parser.add_option( + '-I', '--noincremental', dest='incremental', action='store_false', + help='do not skip already-imported directories' +) +import_cmd.parser.add_option( + '--flat', dest='flat', action='store_true', + help='import an entire tree as a single album' +) +import_cmd.parser.add_option( + '-g', '--group-albums', dest='group_albums', action='store_true', + help='group tracks in a folder into separate albums' +) +import_cmd.parser.add_option( + '--pretend', dest='pretend', action='store_true', + help='just print the files to import' +) import_cmd.func = import_func default_commands.append(import_cmd) @@ -860,19 +966,28 @@ def list_items(lib, query, album, fmt): for item in lib.items(query): ui.print_obj(item, lib, tmpl) -list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) -list_cmd.parser.add_option('-a', '--album', action='store_true', - help='show matching albums instead of tracks') -list_cmd.parser.add_option('-p', '--path', action='store_true', - help='print paths for matched items or albums') -list_cmd.parser.add_option('-f', '--format', action='store', - help='print with custom format', default=None) + def list_func(lib, opts, args): if opts.path: fmt = '$path' else: fmt = opts.format list_items(lib, decargs(args), opts.album, fmt) + + +list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) +list_cmd.parser.add_option( + '-a', '--album', action='store_true', + help='show matching albums instead of tracks' +) +list_cmd.parser.add_option( + '-p', '--path', action='store_true', + help='print paths for matched items or albums' +) +list_cmd.parser.add_option( + '-f', '--format', action='store', + help='print with custom format', default=None +) list_cmd.func = list_func default_commands.append(list_cmd) @@ -900,14 +1015,14 @@ def update_items(lib, query, album, move, pretend): # Did the item change since last checked? if item.current_mtime() <= item.mtime: - log.debug(u'skipping %s because mtime is up to date (%i)' % - (displayable_path(item.path), item.mtime)) + log.debug(u'skipping {0} because mtime is up to date ({1})' + .format(displayable_path(item.path), item.mtime)) continue # Read new data. try: item.read() - except Exception as exc: + except library.ReadError as exc: log.error(u'error reading {0}: {1}'.format( displayable_path(item.path), exc)) continue @@ -923,7 +1038,7 @@ def update_items(lib, query, album, move, pretend): # Check for and display changes. changed = ui.show_model_changes(item, - fields=library.ITEM_KEYS_META) + fields=library.Item._media_fields) # Save changes. if not pretend: @@ -951,32 +1066,44 @@ def update_items(lib, query, album, move, pretend): continue album = lib.get_album(album_id) if not album: # Empty albums have already been removed. - log.debug('emptied album %i' % album_id) + log.debug(u'emptied album {0}'.format(album_id)) continue first_item = album.items().get() # Update album structure to reflect an item in it. - for key in library.ALBUM_KEYS_ITEM: + for key in library.Album.item_keys: album[key] = first_item[key] album.store() # Move album art (and any inconsistent items). if move and lib.directory in ancestry(first_item.path): - log.debug('moving album %i' % album_id) + log.debug(u'moving album {0}'.format(album_id)) album.move() -update_cmd = ui.Subcommand('update', - help='update the library', aliases=('upd','up',)) -update_cmd.parser.add_option('-a', '--album', action='store_true', - help='match albums instead of tracks') -update_cmd.parser.add_option('-M', '--nomove', action='store_false', - default=True, dest='move', help="don't move files in library") -update_cmd.parser.add_option('-p', '--pretend', action='store_true', - help="show all changes but do nothing") -update_cmd.parser.add_option('-f', '--format', action='store', - help='print with custom format', default=None) + def update_func(lib, opts, args): update_items(lib, decargs(args), opts.album, opts.move, opts.pretend) + + +update_cmd = ui.Subcommand( + 'update', help='update the library', aliases=('upd', 'up',) +) +update_cmd.parser.add_option( + '-a', '--album', action='store_true', + help='match albums instead of tracks' +) +update_cmd.parser.add_option( + '-M', '--nomove', action='store_false', default=True, dest='move', + help="don't move files in library" +) +update_cmd.parser.add_option( + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" +) +update_cmd.parser.add_option( + '-f', '--format', action='store', + help='print with custom format', default=None +) update_cmd.func = update_func default_commands.append(update_cmd) @@ -984,23 +1111,27 @@ default_commands.append(update_cmd) # remove: Remove items from library, delete files. def remove_items(lib, query, album, delete): - """Remove items matching query from If album, then match and + """Remove items matching query from lib. If album, then match and remove whole albums. If delete, also remove files from disk. """ # Get the matching items. items, albums = _do_query(lib, query, album) - # Show all the items. - for item in items: - ui.print_obj(item, lib) - - # Confirm with user. + # Prepare confirmation with user. print_() if delete: + fmt = u'$path - $title' prompt = 'Really DELETE %i files (y/n)?' % len(items) else: + fmt = None prompt = 'Really remove %i items from the library (y/n)?' % \ len(items) + + # Show all the items. + for item in items: + ui.print_obj(item, lib, fmt) + + # Confirm with user. if not ui.input_yn(prompt, True): return @@ -1009,14 +1140,22 @@ def remove_items(lib, query, album, delete): for obj in (albums if album else items): obj.remove(delete) -remove_cmd = ui.Subcommand('remove', - help='remove matching items from the library', aliases=('rm',)) -remove_cmd.parser.add_option("-d", "--delete", action="store_true", - help="also remove files from disk") -remove_cmd.parser.add_option('-a', '--album', action='store_true', - help='match albums instead of tracks') + def remove_func(lib, opts, args): remove_items(lib, decargs(args), opts.album, opts.delete) + + +remove_cmd = ui.Subcommand( + 'remove', help='remove matching items from the library', aliases=('rm',) +) +remove_cmd.parser.add_option( + "-d", "--delete", action="store_true", + help="also remove files from disk" +) +remove_cmd.parser.add_option( + '-a', '--album', action='store_true', + help='match albums instead of tracks' +) remove_cmd.func = remove_func default_commands.append(remove_cmd) @@ -1032,6 +1171,7 @@ def show_stats(lib, query, exact): total_items = 0 artists = set() albums = set() + album_artists = set() for item in items: if exact: @@ -1041,25 +1181,42 @@ def show_stats(lib, query, exact): total_time += item.length total_items += 1 artists.add(item.artist) - albums.add(item.album) + album_artists.add(item.albumartist) + if item.album_id: + albums.add(item.album_id) size_str = '' + ui.human_bytes(total_size) if exact: size_str += ' ({0} bytes)'.format(total_size) print_("""Tracks: {0} -Total time: {1} ({2:.2f} seconds) -Total size: {3} -Artists: {4} -Albums: {5}""".format(total_items, ui.human_seconds(total_time), total_time, - size_str, len(artists), len(albums))) +Total time: {1}{2} +{3}: {4} +Artists: {5} +Albums: {6} +Album artists: {7}""".format( + total_items, + ui.human_seconds(total_time), + ' ({0:.2f} seconds)'.format(total_time) if exact else '', + 'Total size' if exact else 'Approximate total size', + size_str, + len(artists), + len(albums), + len(album_artists)), + ) + -stats_cmd = ui.Subcommand('stats', - help='show statistics about the library or a query') -stats_cmd.parser.add_option('-e', '--exact', action='store_true', - help='get exact file sizes') def stats_func(lib, opts, args): show_stats(lib, decargs(args), opts.exact) + + +stats_cmd = ui.Subcommand( + 'stats', help='show statistics about the library or a query' +) +stats_cmd.parser.add_option( + '-e', '--exact', action='store_true', + help='exact size and time' +) stats_cmd.func = stats_func default_commands.append(stats_cmd) @@ -1074,8 +1231,11 @@ def show_version(lib, opts, args): print_('plugins:', ', '.join(names)) else: print_('no plugins loaded') -version_cmd = ui.Subcommand('version', - help='output version information') + + +version_cmd = ui.Subcommand( + 'version', help='output version information' +) version_cmd.func = show_version default_commands.append(version_cmd) @@ -1083,13 +1243,17 @@ default_commands.append(version_cmd) # modify: Declaratively change metadata. def modify_items(lib, mods, dels, query, write, move, album, confirm): - """Modifies matching items according to key=value assignments.""" + """Modifies matching items according to user-specified assignments and + deletions. + + `mods` is a dictionary of field and value pairse indicating + assignments. `dels` is a list of fields to be deleted. + """ # Parse key=value specifications into a dictionary. model_cls = library.Album if album else library.Item - fsets = {} - for mod in mods: - key, value = mod.split('=', 1) - fsets[key] = model_cls._parse(key, value) + + for key, value in mods.items(): + mods[key] = model_cls._parse(key, value) # Get the items to modify. items, albums = _do_query(lib, query, album, False) @@ -1097,13 +1261,16 @@ def modify_items(lib, mods, dels, query, write, move, album, confirm): # Apply changes *temporarily*, preview them, and collect modified # objects. - print_('Modifying %i %ss.' % (len(objs), 'album' if album else 'item')) + print_('Modifying {0} {1}s.' + .format(len(objs), 'album' if album else 'item')) changed = set() for obj in objs: - for field, value in fsets.iteritems(): - obj[field] = value + obj.update(mods) for field in dels: - del obj[field] + try: + del obj[field] + except KeyError: + pass if ui.show_model_changes(obj): changed.add(obj) @@ -1114,65 +1281,87 @@ def modify_items(lib, mods, dels, query, write, move, album, confirm): # Confirm action. if confirm: - extra = ' and write tags' if write else '' + if write and move: + extra = ', move and write tags' + elif write: + extra = ' and write tags' + elif move: + extra = ' and move' + else: + extra = '' + if not ui.input_yn('Really modify%s (Y/n)?' % extra): return - # Apply changes to database. + # Apply changes to database and files with lib.transaction(): for obj in changed: if move: cur_path = obj.path - if lib.directory in ancestry(cur_path): # In library? - log.debug('moving object %s' % cur_path) + if lib.directory in ancestry(cur_path): # In library? + log.debug(u'moving object {0}' + .format(displayable_path(cur_path))) obj.move() - obj.store() + obj.try_sync(write) - # Apply tags if requested. - if write: - if album: - changed_items = itertools.chain(*(a.items() for a in changed)) - else: - changed_items = changed - for item in changed_items: - try: - item.write() - except library.FileOperationError as exc: - log.error(exc) -modify_cmd = ui.Subcommand('modify', - help='change metadata fields', aliases=('mod',)) -modify_cmd.parser.add_option('-M', '--nomove', action='store_false', - default=True, dest='move', help="don't move files in library") -modify_cmd.parser.add_option('-w', '--write', action='store_true', - default=None, help="write new metadata to files' tags (default)") -modify_cmd.parser.add_option('-W', '--nowrite', action='store_false', - dest='write', help="don't write metadata (opposite of -w)") -modify_cmd.parser.add_option('-a', '--album', action='store_true', - help='modify whole albums instead of tracks') -modify_cmd.parser.add_option('-y', '--yes', action='store_true', - help='skip confirmation') -modify_cmd.parser.add_option('-f', '--format', action='store', - help='print with custom format', default=None) -def modify_func(lib, opts, args): - args = decargs(args) - mods = [] +def modify_parse_args(args): + """Split the arguments for the modify subcommand into query parts, + assignments (field=value), and deletions (field!). Returns the result as + a three-tuple in that order. + """ + mods = {} dels = [] query = [] for arg in args: if arg.endswith('!') and '=' not in arg and ':' not in arg: - dels.append(arg[:-1]) - elif '=' in arg: - mods.append(arg) + dels.append(arg[:-1]) # Strip trailing !. + elif '=' in arg and ':' not in arg.split('=', 1)[0]: + key, val = arg.split('=', 1) + mods[key] = val else: query.append(arg) + return query, mods, dels + + +def modify_func(lib, opts, args): + query, mods, dels = modify_parse_args(decargs(args)) if not mods and not dels: raise ui.UserError('no modifications specified') write = opts.write if opts.write is not None else \ config['import']['write'].get(bool) modify_items(lib, mods, dels, query, write, opts.move, opts.album, not opts.yes) + + +modify_cmd = ui.Subcommand( + 'modify', help='change metadata fields', aliases=('mod',) +) +modify_cmd.parser.add_option( + '-M', '--nomove', action='store_false', default=True, dest='move', + help="don't move files in library" +) +modify_cmd.parser.add_option( + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" +) +modify_cmd.parser.add_option( + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" +) +modify_cmd.parser.add_option( + '-a', '--album', action='store_true', + help='modify whole albums instead of tracks' +) +modify_cmd.parser.add_option( + '-y', '--yes', action='store_true', + help='skip confirmation' +) +modify_cmd.parser.add_option( + '-f', '--format', action='store', + help='print with custom format', default=None +) modify_cmd.func = modify_func default_commands.append(modify_cmd) @@ -1189,21 +1378,14 @@ def move_items(lib, dest, query, copy, album): action = 'Copying' if copy else 'Moving' entity = 'album' if album else 'item' - log.info('%s %i %ss.' % (action, len(objs), entity)) + log.info(u'{0} {1} {2}s.'.format(action, len(objs), entity)) for obj in objs: - log.debug('moving: %s' % obj.path) + log.debug(u'moving: {0}'.format(util.displayable_path(obj.path))) obj.move(copy, basedir=dest) obj.store() -move_cmd = ui.Subcommand('move', - help='move or copy items', aliases=('mv',)) -move_cmd.parser.add_option('-d', '--dest', metavar='DIR', dest='dest', - help='destination directory') -move_cmd.parser.add_option('-c', '--copy', default=False, action='store_true', - help='copy instead of moving') -move_cmd.parser.add_option('-a', '--album', default=False, action='store_true', - help='match whole albums instead of tracks') + def move_func(lib, opts, args): dest = opts.dest if dest is not None: @@ -1212,13 +1394,30 @@ def move_func(lib, opts, args): raise ui.UserError('no such directory: %s' % dest) move_items(lib, dest, decargs(args), opts.copy, opts.album) + + +move_cmd = ui.Subcommand( + 'move', help='move or copy items', aliases=('mv',) +) +move_cmd.parser.add_option( + '-d', '--dest', metavar='DIR', dest='dest', + help='destination directory' +) +move_cmd.parser.add_option( + '-c', '--copy', default=False, action='store_true', + help='copy instead of moving' +) +move_cmd.parser.add_option( + '-a', '--album', default=False, action='store_true', + help='match whole albums instead of tracks' +) move_cmd.func = move_func default_commands.append(move_cmd) # write: Write tags into files. -def write_items(lib, query, pretend): +def write_items(lib, query, pretend, force): """Write tag information from the database to the respective files in the filesystem. """ @@ -1235,7 +1434,7 @@ def write_items(lib, query, pretend): # Get an Item object reflecting the "clean" (on-disk) state. try: clean_item = library.Item.from_path(item.path) - except Exception as exc: + except library.ReadError as exc: log.error(u'error reading {0}: {1}'.format( displayable_path(item.path), exc )) @@ -1243,32 +1442,30 @@ def write_items(lib, query, pretend): # Check for and display changes. changed = ui.show_model_changes(item, clean_item, - library.ITEM_KEYS_WRITABLE, always=True) - if changed and not pretend: - try: - item.write() - except library.FileOperationError as exc: - log.error(exc) + library.Item._media_fields, force) + if (changed or force) and not pretend: + item.try_sync() + + +def write_func(lib, opts, args): + write_items(lib, decargs(args), opts.pretend, opts.force) + write_cmd = ui.Subcommand('write', help='write tag information to files') -write_cmd.parser.add_option('-p', '--pretend', action='store_true', - help="show all changes but do nothing") -def write_func(lib, opts, args): - write_items(lib, decargs(args), opts.pretend) +write_cmd.parser.add_option( + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" +) +write_cmd.parser.add_option( + '-f', '--force', action='store_true', + help="write tags even if the existing tags match the database" +) write_cmd.func = write_func default_commands.append(write_cmd) # config: Show and edit user configuration. -config_cmd = ui.Subcommand('config', - help='show or edit the user configuration') -config_cmd.parser.add_option('-p', '--paths', action='store_true', - help='show files that configuration was loaded from') -config_cmd.parser.add_option('-e', '--edit', action='store_true', - help='edit user configuration with $EDITOR') -config_cmd.parser.add_option('-d', '--defaults', action='store_true', - help='include the default configuration') def config_func(lib, opts, args): # Make sure lazy configuration is loaded config.resolve() @@ -1293,48 +1490,79 @@ def config_func(lib, opts, args): # Open in editor. elif opts.edit: - path = config.user_config_path() - - if 'EDITOR' in os.environ: - editor = os.environ['EDITOR'] - args = [editor, editor, path] - elif platform.system() == 'Darwin': - args = ['open', 'open', '-n', path] - elif platform.system() == 'Windows': - # On windows we can execute arbitrary files. The os will - # take care of starting an appropriate application - args = [path, path] - else: - # Assume Unix - args = ['xdg-open', 'xdg-open', path] - - try: - os.execlp(*args) - except OSError: - raise ui.UserError("Could not edit configuration. Please" - "set the EDITOR environment variable.") + config_edit() # Dump configuration. else: print(config.dump(full=opts.defaults)) + +def config_edit(): + """Open a program to edit the user configuration. + """ + path = config.user_config_path() + + if 'EDITOR' in os.environ: + editor = os.environ['EDITOR'] + try: + editor = shlex.split(editor) + except ValueError: # Malformed shell tokens. + editor = [editor] + args = editor + [path] + args.insert(1, args[0]) + elif platform.system() == 'Darwin': + args = ['open', 'open', '-n', path] + elif platform.system() == 'Windows': + # On windows we can execute arbitrary files. The os will + # take care of starting an appropriate application + args = [path, path] + else: + # Assume Unix + args = ['xdg-open', 'xdg-open', path] + + try: + os.execlp(*args) + except OSError: + raise ui.UserError("Could not edit configuration. Please " + "set the EDITOR environment variable.") + + +config_cmd = ui.Subcommand('config', + help='show or edit the user configuration') +config_cmd.parser.add_option( + '-p', '--paths', action='store_true', + help='show files that configuration was loaded from' +) +config_cmd.parser.add_option( + '-e', '--edit', action='store_true', + help='edit user configuration with $EDITOR' +) +config_cmd.parser.add_option( + '-d', '--defaults', action='store_true', + help='include the default configuration' +) config_cmd.func = config_func default_commands.append(config_cmd) # completion: print completion script -completion_cmd = ui.Subcommand('completion', - help='print shell script that provides command line completion') def print_completion(*args): for line in completion_script(default_commands + plugins.commands()): print(line, end='') - if not (os.path.isfile(u'/etc/bash_completion') or - os.path.isfile(u'/usr/share/bash-completion/bash_completion') or - os.path.isfile(u'/usr/share/local/bash-completion/bash_completion')): + if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): log.warn(u'Warning: Unable to find the bash-completion package. ' u'Command line completion might not work.') +BASH_COMPLETION_PATHS = map(syspath, [ + u'/etc/bash_completion', + u'/usr/share/bash-completion/bash_completion', + u'/usr/share/local/bash-completion/bash_completion', + u'/opt/local/share/bash-completion/bash_completion', # SmartOS + u'/usr/local/etc/bash_completion', # Homebrew +]) + + def completion_script(commands): """Yield the full completion shell script as strings. @@ -1355,7 +1583,8 @@ def completion_script(commands): command_names.append(name) for alias in cmd.aliases: - aliases[alias] = name + if re.match(r'^\w+$', alias): + aliases[alias] = name options[name] = {'flags': [], 'opts': []} for opts in cmd.parser._get_all_options()[1:]: @@ -1374,9 +1603,6 @@ def completion_script(commands): 'opts': '-l --library -c --config -d --directory -h --help'.split(' ') } - # Help subcommand - command_names.append('help') - # Add flags common to all commands options['_common'] = { 'flags': ['-h', '--help'] @@ -1397,7 +1623,8 @@ def completion_script(commands): # Fields yield " fields='%s'\n" % ' '.join( - set(library.ITEM_KEYS + library.ALBUM_KEYS)) + set(library.Item._fields.keys() + library.Album._fields.keys()) + ) # Command options for cmd, opts in options.items(): @@ -1410,6 +1637,10 @@ def completion_script(commands): yield '}\n' +completion_cmd = ui.Subcommand( + 'completion', + help='print shell script that provides command line completion' +) completion_cmd.func = print_completion completion_cmd.hide = True default_commands.append(completion_cmd) diff --git a/lib/beets/ui/migrate.py b/lib/beets/ui/migrate.py deleted file mode 100644 index 5766ac98..00000000 --- a/lib/beets/ui/migrate.py +++ /dev/null @@ -1,401 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Conversion from legacy (pre-1.1) configuration to Confit/YAML -configuration. -""" -import os -import ConfigParser -import codecs -import yaml -import logging -import time -import itertools -import re - -import beets -from beets import util -from beets import ui -from beets.util import confit - -CONFIG_PATH_VAR = 'BEETSCONFIG' -DEFAULT_CONFIG_FILENAME_UNIX = '.beetsconfig' -DEFAULT_CONFIG_FILENAME_WINDOWS = 'beetsconfig.ini' -DEFAULT_LIBRARY_FILENAME_UNIX = '.beetsmusic.blb' -DEFAULT_LIBRARY_FILENAME_WINDOWS = 'beetsmusic.blb' -WINDOWS_BASEDIR = os.environ.get('APPDATA') or '~' - -OLD_CONFIG_SUFFIX = '.old' -PLUGIN_NAMES = { - 'rdm': 'random', - 'fuzzy_search': 'fuzzy', -} -AUTO_KEYS = ('automatic', 'autofetch', 'autoembed', 'autoscrub') -IMPORTFEEDS_PREFIX = 'feeds_' -CONFIG_MIGRATED_MESSAGE = u""" -You appear to be upgrading from beets 1.0 (or earlier) to 1.1. Your -configuration file has been migrated automatically to: -{newconfig} -Edit this file to configure beets. You might want to remove your -old-style ".beetsconfig" file now. See the documentation for more -details on the new configuration system: -http://beets.readthedocs.org/page/reference/config.html -""".strip() -DB_MIGRATED_MESSAGE = u'Your database file has also been copied to:\n{newdb}' -YAML_COMMENT = '# Automatically migrated from legacy .beetsconfig.\n\n' - -log = logging.getLogger('beets') - -# An itertools recipe. -def grouper(n, iterable): - args = [iter(iterable)] * n - return itertools.izip_longest(*args) - -def _displace(fn): - """Move a file aside using a timestamp suffix so a new file can be - put in its place. - """ - util.move( - fn, - u'{0}.old.{1}'.format(fn, int(time.time())), - True - ) - -def default_paths(): - """Produces the appropriate default config and library database - paths for the current system. On Unix, this is always in ~. On - Windows, tries ~ first and then $APPDATA for the config and library - files (for backwards compatibility). - """ - windows = os.path.__name__ == 'ntpath' - if windows: - windata = os.environ.get('APPDATA') or '~' - - # Shorthand for joining paths. - def exp(*vals): - return os.path.expanduser(os.path.join(*vals)) - - config = exp('~', DEFAULT_CONFIG_FILENAME_UNIX) - if windows and not os.path.exists(config): - config = exp(windata, DEFAULT_CONFIG_FILENAME_WINDOWS) - - libpath = exp('~', DEFAULT_LIBRARY_FILENAME_UNIX) - if windows and not os.path.exists(libpath): - libpath = exp(windata, DEFAULT_LIBRARY_FILENAME_WINDOWS) - - return config, libpath - -def get_config(): - """Using the same logic as beets 1.0, locate and read the - .beetsconfig file. Return a ConfigParser instance or None if no - config is found. - """ - default_config, default_libpath = default_paths() - if CONFIG_PATH_VAR in os.environ: - configpath = os.path.expanduser(os.environ[CONFIG_PATH_VAR]) - else: - configpath = default_config - - config = ConfigParser.SafeConfigParser() - if os.path.exists(util.syspath(configpath)): - with codecs.open(configpath, 'r', encoding='utf-8') as f: - config.readfp(f) - return config, configpath - else: - return None, configpath - -def flatten_config(config): - """Given a ConfigParser, flatten the values into a dict-of-dicts - representation where each section gets its own dictionary of values. - """ - out = confit.OrderedDict() - for section in config.sections(): - sec_dict = out[section] = confit.OrderedDict() - for option in config.options(section): - sec_dict[option] = config.get(section, option, True) - return out - -def transform_value(value): - """Given a string read as the value of a config option, return a - massaged version of that value (possibly with a different type). - """ - # Booleans. - if value.lower() in ('false', 'no', 'off'): - return False - elif value.lower() in ('true', 'yes', 'on'): - return True - - # Integers. - try: - return int(value) - except ValueError: - pass - - # Floats. - try: - return float(value) - except ValueError: - pass - - return value - -def transform_data(data): - """Given a dict-of-dicts representation of legacy config data, tweak - the data into a new form. This new form is suitable for dumping as - YAML. - """ - out = confit.OrderedDict() - - for section, pairs in data.items(): - if section == 'beets': - # The "main" section. In the new config system, these values - # are in the "root": no section at all. - for key, value in pairs.items(): - value = transform_value(value) - - if key.startswith('import_'): - # Importer config is now under an "import:" key. - if 'import' not in out: - out['import'] = confit.OrderedDict() - out['import'][key[7:]] = value - - elif key == 'plugins': - # Renamed plugins. - plugins = value.split() - new_plugins = [PLUGIN_NAMES.get(p, p) for p in plugins] - out['plugins'] = ' '.join(new_plugins) - - elif key == 'replace': - # YAMLy representation for character replacements. - replacements = confit.OrderedDict() - for pat, repl in grouper(2, value.split()): - if repl == '': - repl = '' - replacements[pat] = repl - out['replace'] = replacements - - elif key == 'pluginpath': - # Used to be a colon-separated string. Now a list. - out['pluginpath'] = value.split(':') - - else: - out[key] = value - - elif pairs: - # Other sections (plugins, etc). - sec_out = out[section] = confit.OrderedDict() - for key, value in pairs.items(): - - # Standardized "auto" option. - if key in AUTO_KEYS: - key = 'auto' - - # Unnecessary : hack in queries. - if section == 'paths': - key = key.replace('_', ':') - - # Changed option names for importfeeds plugin. - if section == 'importfeeds': - if key.startswith(IMPORTFEEDS_PREFIX): - key = key[len(IMPORTFEEDS_PREFIX):] - - sec_out[key] = transform_value(value) - - return out - -class Dumper(yaml.SafeDumper): - """A PyYAML Dumper that represents OrderedDicts as ordinary mappings - (in order, of course). - """ - # From http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = yaml.MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, yaml.ScalarNode) and \ - not node_key.style): - best_style = False - if not (isinstance(node_value, yaml.ScalarNode) and \ - not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node -Dumper.add_representer(confit.OrderedDict, Dumper.represent_dict) - -def migrate_config(replace=False): - """Migrate a legacy beetsconfig file to a new-style config.yaml file - in an appropriate place. If `replace` is enabled, then any existing - config.yaml will be moved aside. Otherwise, the process is aborted - when the file exists. - """ - - # Load legacy configuration data, if any. - config, configpath = get_config() - if not config: - log.debug(u'no config file found at {0}'.format( - util.displayable_path(configpath) - )) - return - - # Get the new configuration file path and possibly move it out of - # the way. - destfn = os.path.join(beets.config.config_dir(), confit.CONFIG_FILENAME) - if os.path.exists(destfn): - if replace: - log.debug(u'moving old config aside: {0}'.format( - util.displayable_path(destfn) - )) - _displace(destfn) - else: - # File exists and we won't replace it. We're done. - return - - log.debug(u'migrating config file {0}'.format( - util.displayable_path(configpath) - )) - - # Convert the configuration to a data structure ready to be dumped - # as the new Confit file. - data = transform_data(flatten_config(config)) - - # Encode result as YAML. - yaml_out = yaml.dump( - data, - Dumper=Dumper, - default_flow_style=False, - indent=4, - width=1000, - ) - # A ridiculous little hack to add some whitespace between "sections" - # in the YAML output. I hope this doesn't break any YAML syntax. - yaml_out = re.sub(r'(\n\w+:\n [^-\s])', '\n\\1', yaml_out) - yaml_out = YAML_COMMENT + yaml_out - - # Write the data to the new config destination. - log.debug(u'writing migrated config to {0}'.format( - util.displayable_path(destfn) - )) - with open(destfn, 'w') as f: - f.write(yaml_out) - return destfn - -def migrate_db(replace=False): - """Copy the beets library database file to the new location (e.g., - from ~/.beetsmusic.blb to ~/.config/beets/library.db). - """ - _, srcfn = default_paths() - destfn = beets.config['library'].as_filename() - - if not os.path.exists(srcfn) or srcfn == destfn: - # Old DB does not exist or we're configured to point to the same - # database. Do nothing. - return - - if os.path.exists(destfn): - if replace: - log.debug(u'moving old database aside: {0}'.format( - util.displayable_path(destfn) - )) - _displace(destfn) - else: - return - - log.debug(u'copying database from {0} to {1}'.format( - util.displayable_path(srcfn), util.displayable_path(destfn) - )) - util.copy(srcfn, destfn) - return destfn - -def migrate_state(replace=False): - """Copy the beets runtime state file from the old path (i.e., - ~/.beetsstate) to the new path (i.e., ~/.config/beets/state.pickle). - """ - srcfn = os.path.expanduser(os.path.join('~', '.beetsstate')) - if not os.path.exists(srcfn): - return - - destfn = beets.config['statefile'].as_filename() - if os.path.exists(destfn): - if replace: - _displace(destfn) - else: - return - - log.debug(u'copying state file from {0} to {1}'.format( - util.displayable_path(srcfn), util.displayable_path(destfn) - )) - util.copy(srcfn, destfn) - return destfn - - -# Automatic migration when beets starts. - -def automigrate(): - """Migrate the configuration, database, and state files. If any - migration occurs, print out a notice with some helpful next steps. - """ - config_fn = migrate_config() - db_fn = migrate_db() - migrate_state() - - if config_fn: - ui.print_(ui.colorize('fuchsia', u'MIGRATED CONFIGURATION')) - - ui.print_(CONFIG_MIGRATED_MESSAGE.format( - newconfig=util.displayable_path(config_fn)) - ) - if db_fn: - ui.print_(DB_MIGRATED_MESSAGE.format( - newdb=util.displayable_path(db_fn) - )) - - ui.input_(ui.colorize('fuchsia', u'Press ENTER to continue:')) - ui.print_() - - -# CLI command for explicit migration. - -migrate_cmd = ui.Subcommand('migrate', help='convert legacy config') -def migrate_func(lib, opts, args): - """Explicit command for migrating files. Existing files in each - destination are moved aside. - """ - config_fn = migrate_config(replace=True) - if config_fn: - log.info(u'Migrated configuration to: {0}'.format( - util.displayable_path(config_fn) - )) - db_fn = migrate_db(replace=True) - if db_fn: - log.info(u'Migrated library database to: {0}'.format( - util.displayable_path(db_fn) - )) - state_fn = migrate_state(replace=True) - if state_fn: - log.info(u'Migrated state file to: {0}'.format( - util.displayable_path(state_fn) - )) -migrate_cmd.func = migrate_func diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py index 985c8b6c..38cecd70 100644 --- a/lib/beets/util/__init__.py +++ b/lib/beets/util/__init__.py @@ -23,10 +23,13 @@ import fnmatch from collections import defaultdict import traceback import subprocess +import platform + MAX_FILENAME_LENGTH = 200 WINDOWS_MAGIC_PREFIX = u'\\\\?\\' + class HumanReadableException(Exception): """An Exception that can include a human-readable error message to be logged without a traceback. Can preserve a traceback for @@ -82,6 +85,7 @@ class HumanReadableException(Exception): logger.debug(self.tb) logger.error(u'{0}: {1}'.format(self.error_kind, self.args[0])) + class FilesystemError(HumanReadableException): """An error that occurred while performing a filesystem manipulation via a function in this module. The `paths` field is a sequence of @@ -111,6 +115,7 @@ class FilesystemError(HumanReadableException): return u'{0} {1}'.format(self._reasonstr(), clause) + def normpath(path): """Provide the canonical form of the path suitable for storing in the database. @@ -119,6 +124,7 @@ def normpath(path): path = os.path.normpath(os.path.abspath(os.path.expanduser(path))) return bytestring_path(path) + def ancestry(path): """Return a list consisting of path's parent directory, its grandparent, and so on. For instance: @@ -137,10 +143,12 @@ def ancestry(path): break last_path = path - if path: # don't yield '' + if path: + # don't yield '' out.insert(0, path) return out + def sorted_walk(path, ignore=(), logger=None): """Like `os.walk`, but yields things in case-insensitive sorted, breadth-first order. Directory and file names matching any glob @@ -192,6 +200,7 @@ def sorted_walk(path, ignore=(), logger=None): for res in sorted_walk(cur, ignore, logger): yield res + def mkdirall(path): """Make all the enclosing directories of path (like mkdir -p on the parent). @@ -204,6 +213,7 @@ def mkdirall(path): raise FilesystemError(exc, 'create', (ancestor,), traceback.format_exc()) + def fnmatch_all(names, patterns): """Determine whether all strings in `names` match at least one of the `patterns`, which should be shell glob expressions. @@ -218,6 +228,7 @@ def fnmatch_all(names, patterns): return False return True + def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): """If path is an empty directory, then remove it. Recursively remove path's ancestry up to root (which is never removed) where there are @@ -236,7 +247,7 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): ancestors = [] elif root in ancestors: # Only remove directories below the root. - ancestors = ancestors[ancestors.index(root)+1:] + ancestors = ancestors[ancestors.index(root) + 1:] else: # Remove nothing. return @@ -258,6 +269,7 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): else: break + def components(path): """Return a list of the path components in path. For instance: @@ -281,6 +293,7 @@ def components(path): return comps + def _fsencoding(): """Get the system's filesystem encoding. On Windows, this is always UTF-8 (not MBCS). @@ -295,6 +308,7 @@ def _fsencoding(): encoding = 'utf8' return encoding + def bytestring_path(path): """Given a path, which is either a str or a unicode, returns a str path (ensuring that we never deal with Unicode pathnames). @@ -315,6 +329,7 @@ def bytestring_path(path): except (UnicodeError, LookupError): return path.encode('utf8') + def displayable_path(path, separator=u'; '): """Attempts to decode a bytestring path to a unicode object for the purpose of displaying it to the user. If the `path` argument is a @@ -333,6 +348,7 @@ def displayable_path(path, separator=u'; '): except (UnicodeError, LookupError): return path.decode('utf8', 'ignore') + def syspath(path, prefix=True): """Convert a path for use by the operating system. In particular, paths on Windows must receive a magic prefix and must be converted @@ -356,16 +372,22 @@ def syspath(path, prefix=True): encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() path = path.decode(encoding, 'replace') - # Add the magic prefix if it isn't already there + # Add the magic prefix if it isn't already there. + # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX): + if path.startswith(u'\\\\'): + # UNC path. Final path should look like \\?\UNC\... + path = u'UNC' + path[1:] path = WINDOWS_MAGIC_PREFIX + path return path + def samefile(p1, p2): """Safer equality for paths.""" return shutil._samefile(syspath(p1), syspath(p2)) + def remove(path, soft=True): """Remove the file. If `soft`, then no error will be raised if the file does not exist. @@ -378,6 +400,7 @@ def remove(path, soft=True): except (OSError, IOError) as exc: raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) + def copy(path, dest, replace=False): """Copy a plain file. Permissions are not copied. If `dest` already exists, raises a FilesystemError unless `replace` is True. Has no @@ -396,6 +419,7 @@ def copy(path, dest, replace=False): raise FilesystemError(exc, 'copy', (path, dest), traceback.format_exc()) + def move(path, dest, replace=False): """Rename a file. `dest` may not be a directory. If `dest` already exists, raises an OSError unless `replace` is True. Has no effect if @@ -424,6 +448,27 @@ def move(path, dest, replace=False): raise FilesystemError(exc, 'move', (path, dest), traceback.format_exc()) + +def link(path, dest, replace=False): + """Create a symbolic link from path to `dest`. Raises an OSError if + `dest` already exists, unless `replace` is True. Does nothing if + `path` == `dest`.""" + if (samefile(path, dest)): + return + + path = syspath(path) + dest = syspath(dest) + if os.path.exists(dest) and not replace: + raise FilesystemError('file exists', 'rename', (path, dest), + traceback.format_exc()) + try: + os.symlink(path, dest) + except OSError: + raise FilesystemError('Operating system does not support symbolic ' + 'links.', 'link', (path, dest), + traceback.format_exc()) + + def unique_path(path): """Returns a version of ``path`` that does not exist on the filesystem. Specifically, if ``path` itself already exists, then @@ -457,6 +502,8 @@ CHAR_REPLACE = [ (re.compile(ur'\.$'), u'_'), # Trailing dots. (re.compile(ur'\s+$'), u''), # Trailing whitespace. ] + + def sanitize_path(path, replacements=None): """Takes a path (as a Unicode string) and makes sure that it is legal. Returns a new path. Only works with fragments; won't work @@ -477,6 +524,7 @@ def sanitize_path(path, replacements=None): comps[i] = comp return os.path.join(*comps) + def truncate_path(path, length=MAX_FILENAME_LENGTH): """Given a bytestring path or a Unicode path fragment, truncate the components to a legal length. In the last component, the extension @@ -493,6 +541,7 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH): return os.path.join(*out) + def str2bool(value): """Returns a boolean reflecting a human-entered string.""" if value.lower() in ('yes', '1', 'true', 't', 'y'): @@ -500,6 +549,7 @@ def str2bool(value): else: return False + def as_string(value): """Convert a value to a Unicode object for matching with a query. None becomes the empty string. Bytestrings are silently decoded. @@ -513,6 +563,7 @@ def as_string(value): else: return unicode(value) + def levenshtein(s1, s2): """A nice DP edit distance implementation from Wikibooks: http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/ @@ -535,6 +586,7 @@ def levenshtein(s1, s2): return previous_row[-1] + def plurality(objs): """Given a sequence of comparable objects, returns the object that is most common in the set and the frequency of that object. The @@ -558,6 +610,7 @@ def plurality(objs): return res, max_freq + def cpu_count(): """Return the number of hardware thread contexts (cores or SMT threads) in the system. @@ -571,7 +624,7 @@ def cpu_count(): num = 0 elif sys.platform == 'darwin': try: - num = int(os.popen('sysctl -n hw.ncpu').read()) + num = int(command_output(['sysctl', '-n', 'hw.ncpu'])) except ValueError: num = 0 else: @@ -584,23 +637,38 @@ def cpu_count(): else: return 1 -def command_output(cmd): - """Wraps the `subprocess` module to invoke a command (given as a - list of arguments starting with the command name) and collect - stdout. The stderr stream is ignored. May raise - `subprocess.CalledProcessError` or an `OSError`. + +def command_output(cmd, shell=False): + """Runs the command and returns its output after it has exited. + + ``cmd`` is a list of arguments starting with the command names. If + ``shell`` is true, ``cmd`` is assumed to be a string and passed to a + shell to execute. + + If the process exits with a non-zero return code + ``subprocess.CalledProcessError`` is raised. May also raise + ``OSError``. This replaces `subprocess.check_output`, which isn't available in Python 2.6 and which can have problems if lots of output is sent to stderr. """ - with open(os.devnull, 'w') as devnull: - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=devnull) - stdout, _ = proc.communicate() + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=platform.system() != 'Windows', + shell=shell + ) + stdout, stderr = proc.communicate() if proc.returncode: - raise subprocess.CalledProcessError(proc.returncode, cmd) + raise subprocess.CalledProcessError( + returncode=proc.returncode, + cmd=' '.join(cmd), + ) return stdout + def max_filename_length(path, limit=MAX_FILENAME_LENGTH): """Attempt to determine the maximum filename length for the filesystem containing `path`. If the value is greater than `limit`, diff --git a/lib/beets/util/artresizer.py b/lib/beets/util/artresizer.py index 6e367a0a..f17fdc5b 100644 --- a/lib/beets/util/artresizer.py +++ b/lib/beets/util/artresizer.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2013, Fabrice Laporte +# Copyright 2014, Fabrice Laporte # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -18,6 +18,7 @@ public resizing proxy if neither is available. import urllib import subprocess import os +import re from tempfile import NamedTemporaryFile import logging from beets import util @@ -37,7 +38,7 @@ def resize_url(url, maxwidth): maxwidth (preserving aspect ratio). """ return '{0}?{1}'.format(PROXY_URL, urllib.urlencode({ - 'url': url.replace('http://',''), + 'url': url.replace('http://', ''), 'w': str(maxwidth), })) @@ -76,7 +77,7 @@ def pil_resize(maxwidth, path_in, path_out=None): def im_resize(maxwidth, path_in, path_out=None): """Resize using ImageMagick's ``convert`` tool. - tool. Return the output path of resized image. + Return the output path of resized image. """ path_out = path_out or temp_file_for(path_in) log.debug(u'artresizer: ImageMagick resizing {0} to {1}'.format( @@ -132,8 +133,9 @@ class ArtResizer(object): """Create a resizer object for the given method or, if none is specified, with an inferred method. """ - self.method = method or self._guess_method() + self.method = self._check_method(method) log.debug(u"artresizer: method is {0}".format(self.method)) + self.can_compare = self._can_compare() def resize(self, maxwidth, path_in, path_out=None): """Manipulate an image file according to the method, returning a @@ -141,7 +143,7 @@ class ArtResizer(object): temporary file. For WEBPROXY, returns `path_in` unmodified. """ if self.local: - func = BACKEND_FUNCS[self.method] + func = BACKEND_FUNCS[self.method[0]] return func(maxwidth, path_in, path_out) else: return path_in @@ -159,30 +161,51 @@ class ArtResizer(object): @property def local(self): """A boolean indicating whether the resizing method is performed - locally (i.e., PIL or IMAGEMAGICK). + locally (i.e., PIL or ImageMagick). """ - return self.method in BACKEND_FUNCS + return self.method[0] in BACKEND_FUNCS + + def _can_compare(self): + """A boolean indicating whether image comparison is available""" + + return self.method[0] == IMAGEMAGICK and self.method[1] > (6, 8, 7) @staticmethod - def _guess_method(): - """Determine which resizing method to use. Returns PIL, - IMAGEMAGICK, or WEBPROXY depending on available dependencies. + def _check_method(method=None): + """A tuple indicating whether current method is available and its + version. If no method is given, it returns a supported one. """ - # Try importing PIL. - try: - __import__('PIL', fromlist=['Image']) - return PIL - except ImportError: - pass + # Guess available method + if not method: + for m in [IMAGEMAGICK, PIL]: + _, version = ArtResizer._check_method(m) + if version: + return (m, version) + return (WEBPROXY, (0)) - # Try invoking ImageMagick's "convert". - try: - out = util.command_output(['convert', '--version']) - if 'imagemagick' in out.lower(): - # system32/convert.exe may be interfering - return IMAGEMAGICK - except (subprocess.CalledProcessError, OSError): - pass + if method == IMAGEMAGICK: - # Fall back to Web proxy method. - return WEBPROXY + # Try invoking ImageMagick's "convert". + try: + out = util.command_output(['identify', '--version']) + + if 'imagemagick' in out.lower(): + pattern = r".+ (\d+)\.(\d+)\.(\d+).*" + match = re.search(pattern, out) + if match: + return (IMAGEMAGICK, + (int(match.group(1)), + int(match.group(2)), + int(match.group(3)))) + return (IMAGEMAGICK, (0)) + + except (subprocess.CalledProcessError, OSError): + return (IMAGEMAGICK, None) + + if method == PIL: + # Try importing PIL. + try: + __import__('PIL', fromlist=['Image']) + return (PIL, (0)) + except ImportError: + return (PIL, None) diff --git a/lib/beets/util/bluelet.py b/lib/beets/util/bluelet.py index 9d9432f2..a12ec945 100644 --- a/lib/beets/util/bluelet.py +++ b/lib/beets/util/bluelet.py @@ -38,6 +38,7 @@ class Event(object): """ pass + class WaitableEvent(Event): """A waitable event is one encapsulating an action that can be waited for using a select() call. That is, it's an event with an @@ -57,21 +58,25 @@ class WaitableEvent(Event): """ pass + class ValueEvent(Event): """An event that does nothing but return a fixed value.""" def __init__(self, value): self.value = value + class ExceptionEvent(Event): """Raise an exception at the yield point. Used internally.""" def __init__(self, exc_info): self.exc_info = exc_info + class SpawnEvent(Event): """Add a new coroutine thread to the scheduler.""" def __init__(self, coro): self.spawned = coro + class JoinEvent(Event): """Suspend the thread until the specified child thread has completed. @@ -79,11 +84,13 @@ class JoinEvent(Event): def __init__(self, child): self.child = child + class KillEvent(Event): """Unschedule a child thread.""" def __init__(self, child): self.child = child + class DelegationEvent(Event): """Suspend execution of the current thread, start a new thread and, once the child thread finished, return control to the parent @@ -92,6 +99,7 @@ class DelegationEvent(Event): def __init__(self, coro): self.spawned = coro + class ReturnEvent(Event): """Return a value the current thread's delegator at the point of delegation. Ends the current (delegate) thread. @@ -99,6 +107,7 @@ class ReturnEvent(Event): def __init__(self, value): self.value = value + class SleepEvent(WaitableEvent): """Suspend the thread for a given duration. """ @@ -108,6 +117,7 @@ class SleepEvent(WaitableEvent): def time_left(self): return max(self.wakeup_time - time.time(), 0.0) + class ReadEvent(WaitableEvent): """Reads from a file-like object.""" def __init__(self, fd, bufsize): @@ -120,6 +130,7 @@ class ReadEvent(WaitableEvent): def fire(self): return self.fd.read(self.bufsize) + class WriteEvent(WaitableEvent): """Writes to a file-like object.""" def __init__(self, fd, data): @@ -192,15 +203,19 @@ def _event_select(events): return ready_events + class ThreadException(Exception): def __init__(self, coro, exc_info): self.coro = coro self.exc_info = exc_info + def reraise(self): _reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + SUSPENDED = Event() # Special sentinel placeholder for suspended threads. + class Delegated(Event): """Placeholder indicating that a thread has delegated execution to a different thread. @@ -208,6 +223,7 @@ class Delegated(Event): def __init__(self, child): self.child = child + def run(root_coro): """Schedules a coroutine, running it to completion. This encapsulates the Bluelet scheduler, which the root coroutine can @@ -329,7 +345,7 @@ def run(root_coro): break # Wait and fire. - event2coro = dict((v,k) for k,v in threads.items()) + event2coro = dict((v, k) for k, v in threads.items()) for event in _event_select(threads.values()): # Run the IO operation, but catch socket errors. try: @@ -378,6 +394,7 @@ def run(root_coro): class SocketClosedError(Exception): pass + class Listener(object): """A socket wrapper object for listening sockets. """ @@ -407,6 +424,7 @@ class Listener(object): self._closed = True self.sock.close() + class Connection(object): """A socket wrapper object for connected sockets. """ @@ -468,6 +486,7 @@ class Connection(object): yield ReturnEvent(line) break + class AcceptEvent(WaitableEvent): """An event for Listener objects (listening sockets) that suspends execution until the socket gets a connection. @@ -482,6 +501,7 @@ class AcceptEvent(WaitableEvent): sock, addr = self.listener.sock.accept() return Connection(sock, addr) + class ReceiveEvent(WaitableEvent): """An event for Connection objects (connected sockets) for asynchronously reading data. @@ -496,6 +516,7 @@ class ReceiveEvent(WaitableEvent): def fire(self): return self.conn.sock.recv(self.bufsize) + class SendEvent(WaitableEvent): """An event for Connection objects (connected sockets) for asynchronously writing data. @@ -523,6 +544,7 @@ def null(): """ return ValueEvent(None) + def spawn(coro): """Event: add another coroutine to the scheduler. Both the parent and child coroutines run concurrently. @@ -531,6 +553,7 @@ def spawn(coro): raise ValueError('%s is not a coroutine' % str(coro)) return SpawnEvent(coro) + def call(coro): """Event: delegate to another coroutine. The current coroutine is resumed once the sub-coroutine finishes. If the sub-coroutine @@ -540,12 +563,14 @@ def call(coro): raise ValueError('%s is not a coroutine' % str(coro)) return DelegationEvent(coro) + def end(value=None): """Event: ends the coroutine and returns a value to its delegator. """ return ReturnEvent(value) + def read(fd, bufsize=None): """Event: read from a file descriptor asynchronously.""" if bufsize is None: @@ -563,10 +588,12 @@ def read(fd, bufsize=None): else: return ReadEvent(fd, bufsize) + def write(fd, data): """Event: write to a file descriptor asynchronously.""" return WriteEvent(fd, data) + def connect(host, port): """Event: connect to a network address and return a Connection object for communicating on the socket. @@ -575,17 +602,20 @@ def connect(host, port): sock = socket.create_connection(addr) return ValueEvent(Connection(sock, addr)) + def sleep(duration): """Event: suspend the thread for ``duration`` seconds. """ return SleepEvent(duration) + def join(coro): """Suspend the thread until another, previously `spawn`ed thread completes. """ return JoinEvent(coro) + def kill(coro): """Halt the execution of a different `spawn`ed thread. """ diff --git a/lib/beets/util/confit.py b/lib/beets/util/confit.py index cf8b3629..de22e0ad 100644 --- a/lib/beets/util/confit.py +++ b/lib/beets/util/confit.py @@ -21,6 +21,8 @@ import pkgutil import sys import yaml import types +import collections +import re try: from collections import OrderedDict except ImportError: @@ -47,6 +49,7 @@ BASESTRING = str if PY3 else basestring NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) TYPE_TYPES = (type,) if PY3 else (type, types.ClassType) + def iter_first(sequence): """Get the first element from an iterable or raise a ValueError if the iterator generates no values. @@ -67,16 +70,25 @@ class ConfigError(Exception): """Base class for exceptions raised when querying a configuration. """ + class NotFoundError(ConfigError): """A requested value could not be found in the configuration trees. """ -class ConfigTypeError(ConfigError, TypeError): + +class ConfigValueError(ConfigError): + """The value in the configuration is illegal.""" + + +class ConfigTypeError(ConfigValueError): """The value in the configuration did not match the expected type. """ -class ConfigValueError(ConfigError, ValueError): - """The value in the configuration is illegal.""" + +class ConfigTemplateError(ConfigError): + """Base class for exceptions raised because of an invalid template. + """ + class ConfigReadError(ConfigError): """A configuration file could not be read.""" @@ -132,6 +144,7 @@ class ConfigSource(dict): else: raise TypeError('source value must be a dict') + class ConfigView(object): """A configuration "view" is a query into a program's configuration data. A view represents a hypothetical location in the configuration @@ -207,6 +220,9 @@ class ConfigView(object): """ self.set({key: value}) + def __contains__(self, key): + return self[key].exists() + def set_args(self, namespace): """Overlay parsed command-line arguments, generated by a library like argparse or optparse, onto this view's value. @@ -310,98 +326,6 @@ class ConfigView(object): # Validation and conversion. - def get(self, typ=None): - """Returns the canonical value for the view, checked against the - passed-in type. If the value is not an instance of the given - type, a ConfigTypeError is raised. May also raise a - NotFoundError. - """ - value, _ = self.first() - - if typ is not None: - if not isinstance(typ, TYPE_TYPES): - raise TypeError('argument to get() must be a type') - - if not isinstance(value, typ): - raise ConfigTypeError( - "{0} must be of type {1}, not {2}".format( - self.name, typ.__name__, type(value).__name__ - ) - ) - - return value - - def as_filename(self): - """Get a string as a normalized as an absolute, tilde-free path. - - Relative paths are relative to the configuration directory (see - the `config_dir` method) if they come from a file. Otherwise, - they are relative to the current working directory. This helps - attain the expected behavior when using command-line options. - """ - path, source = self.first() - if not isinstance(path, BASESTRING): - raise ConfigTypeError('{0} must be a filename, not {1}'.format( - self.name, type(path).__name__ - )) - path = os.path.expanduser(STRING(path)) - - if not os.path.isabs(path) and source.filename: - # From defaults: relative to the app's directory. - path = os.path.join(self.root().config_dir(), path) - - return os.path.abspath(path) - - def as_choice(self, choices): - """Ensure that the value is among a collection of choices and - return it. If `choices` is a dictionary, then return the - corresponding value rather than the value itself (the key). - """ - value = self.get() - - if value not in choices: - raise ConfigValueError( - '{0} must be one of {1}, not {2}'.format( - self.name, repr(list(choices)), repr(value) - ) - ) - - if isinstance(choices, dict): - return choices[value] - else: - return value - - def as_number(self): - """Ensure that a value is of numeric type.""" - value = self.get() - if isinstance(value, NUMERIC_TYPES): - return value - raise ConfigTypeError( - '{0} must be numeric, not {1}'.format( - self.name, type(value).__name__ - ) - ) - - def as_str_seq(self): - """Get the value as a list of strings. The underlying configured - value can be a sequence or a single string. In the latter case, - the string is treated as a white-space separated list of words. - """ - value = self.get() - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') - - if isinstance(value, STRING): - return value.split() - else: - try: - return list(value) - except TypeError: - raise ConfigTypeError( - '{0} must be a whitespace-separated string or ' - 'a list'.format(self.name) - ) - def flatten(self): """Create a hierarchy of OrderedDicts containing the data from this view, recursively reifying all views to get their @@ -415,6 +339,35 @@ class ConfigView(object): od[key] = view.get() return od + def get(self, template=None): + """Retrieve the value for this view according to the template. + + The `template` against which the values are checked can be + anything convertible to a `Template` using `as_template`. This + means you can pass in a default integer or string value, for + example, or a type to just check that something matches the type + you expect. + + May raise a `ConfigValueError` (or its subclass, + `ConfigTypeError`) or a `NotFoundError` when the configuration + doesn't satisfy the template. + """ + return as_template(template).value(self, template) + + # Old validation methods (deprecated). + + def as_filename(self): + return self.get(Filename()) + + def as_choice(self, choices): + return self.get(Choice(choices)) + + def as_number(self): + return self.get(Number()) + + def as_str_seq(self): + return self.get(StrSeq()) + class RootView(ConfigView): """The base of a view hierarchy. This view keeps track of the @@ -518,6 +471,7 @@ def _package_path(name): return os.path.dirname(os.path.abspath(filepath)) + def config_dirs(): """Return a platform-specific list of candidates for user configuration directories on the system. @@ -606,10 +560,12 @@ class Loader(yaml.SafeLoader): plain = super(Loader, self).check_plain() return plain or self.peek() == '%' + Loader.add_constructor('tag:yaml.org,2002:str', Loader._construct_unicode) Loader.add_constructor('tag:yaml.org,2002:map', Loader.construct_yaml_map) Loader.add_constructor('tag:yaml.org,2002:omap', Loader.construct_yaml_map) + def load_yaml(filename): """Read a YAML document from a file. If the file cannot be read or parsed, a ConfigReadError is raised. @@ -679,11 +635,13 @@ class Dumper(yaml.SafeDumper): """ return self.represent_scalar('tag:yaml.org,2002:null', '') + Dumper.add_representer(OrderedDict, Dumper.represent_dict) Dumper.add_representer(bool, Dumper.represent_bool) Dumper.add_representer(type(None), Dumper.represent_none) Dumper.add_representer(list, Dumper.represent_list) + def restore_yaml_comments(data, default_data): """Scan default_data for comments (we include empty lines in our definition of comments) and place them before the same keys in data. @@ -898,3 +856,426 @@ class LazyConfig(Configuration): del self.sources[:] self._lazy_suffix = [] self._lazy_prefix = [] + + +# "Validated" configuration views: experimental! + + +REQUIRED = object() +"""A sentinel indicating that there is no default value and an exception +should be raised when the value is missing. +""" + + +class Template(object): + """A value template for configuration fields. + + The template works like a type and instructs Confit about how to + interpret a deserialized YAML value. This includes type conversions, + providing a default value, and validating for errors. For example, a + filepath type might expand tildes and check that the file exists. + """ + def __init__(self, default=REQUIRED): + """Create a template with a given default value. + + If `default` is the sentinel `REQUIRED` (as it is by default), + then an error will be raised when a value is missing. Otherwise, + missing values will instead return `default`. + """ + self.default = default + + def __call__(self, view): + """Invoking a template on a view gets the view's value according + to the template. + """ + return self.value(view, self) + + def value(self, view, template=None): + """Get the value for a `ConfigView`. + + May raise a `NotFoundError` if the value is missing (and the + template requires it) or a `ConfigValueError` for invalid values. + """ + if view.exists(): + value, _ = view.first() + return self.convert(value, view) + elif self.default is REQUIRED: + # Missing required value. This is an error. + raise NotFoundError("{0} not found".format(view.name)) + else: + # Missing value, but not required. + return self.default + + def convert(self, value, view): + """Convert the YAML-deserialized value to a value of the desired + type. + + Subclasses should override this to provide useful conversions. + May raise a `ConfigValueError` when the configuration is wrong. + """ + # Default implementation does no conversion. + return value + + def fail(self, message, view, type_error=False): + """Raise an exception indicating that a value cannot be + accepted. + + `type_error` indicates whether the error is due to a type + mismatch rather than a malformed value. In this case, a more + specific exception is raised. + """ + exc_class = ConfigTypeError if type_error else ConfigValueError + raise exc_class( + '{0}: {1}'.format(view.name, message) + ) + + def __repr__(self): + return '{0}({1})'.format( + type(self).__name__, + '' if self.default is REQUIRED else repr(self.default), + ) + + +class Integer(Template): + """An integer configuration value template. + """ + def convert(self, value, view): + """Check that the value is an integer. Floats are rounded. + """ + if isinstance(value, int): + return value + elif isinstance(value, float): + return int(value) + else: + self.fail('must be a number', view, True) + + +class Number(Template): + """A numeric type: either an integer or a floating-point number. + """ + def convert(self, value, view): + """Check that the value is an int or a float. + """ + if isinstance(value, NUMERIC_TYPES): + return value + else: + self.fail( + 'must be numeric, not {0}'.format(type(value).__name__), + view, + True + ) + + +class MappingTemplate(Template): + """A template that uses a dictionary to specify other types for the + values for a set of keys and produce a validated `AttrDict`. + """ + def __init__(self, mapping): + """Create a template according to a dict (mapping). The + mapping's values should themselves either be Types or + convertible to Types. + """ + subtemplates = {} + for key, typ in mapping.items(): + subtemplates[key] = as_template(typ) + self.subtemplates = subtemplates + + def value(self, view, template=None): + """Get a dict with the same keys as the template and values + validated according to the value types. + """ + out = AttrDict() + for key, typ in self.subtemplates.items(): + out[key] = typ.value(view[key], self) + return out + + def __repr__(self): + return 'MappingTemplate({0})'.format(repr(self.subtemplates)) + + +class String(Template): + """A string configuration value template. + """ + def __init__(self, default=REQUIRED, pattern=None): + """Create a template with the added optional `pattern` argument, + a regular expression string that the value should match. + """ + super(String, self).__init__(default) + self.pattern = pattern + if pattern: + self.regex = re.compile(pattern) + + def convert(self, value, view): + """Check that the value is a string and matches the pattern. + """ + if isinstance(value, BASESTRING): + if self.pattern and not self.regex.match(value): + self.fail( + "must match the pattern {0}".format(self.pattern), + view + ) + return value + else: + self.fail('must be a string', view, True) + + +class Choice(Template): + """A template that permits values from a sequence of choices. + """ + def __init__(self, choices): + """Create a template that validates any of the values from the + iterable `choices`. + + If `choices` is a map, then the corresponding value is emitted. + Otherwise, the value itself is emitted. + """ + self.choices = choices + + def convert(self, value, view): + """Ensure that the value is among the choices (and remap if the + choices are a mapping). + """ + if value not in self.choices: + self.fail( + 'must be one of {0}, not {1}'.format( + repr(list(self.choices)), repr(value) + ), + view + ) + + if isinstance(self.choices, collections.Mapping): + return self.choices[value] + else: + return value + + def __repr__(self): + return 'Choice({0!r})'.format(self.choices) + + +class StrSeq(Template): + """A template for values that are lists of strings. + + Validates both actual YAML string lists and single strings. Strings + can optionally be split on whitespace. + """ + def __init__(self, split=True): + """Create a new template. + + `split` indicates whether, when the underlying value is a single + string, it should be split on whitespace. Otherwise, the + resulting value is a list containing a single string. + """ + super(StrSeq, self).__init__() + self.split = split + + def convert(self, value, view): + if isinstance(value, bytes): + value = value.decode('utf8', 'ignore') + + if isinstance(value, STRING): + if self.split: + return value.split() + else: + return [value] + + try: + value = list(value) + except TypeError: + self.fail('must be a whitespace-separated string or a list', + view, True) + + def convert(x): + if isinstance(x, unicode): + return x + elif isinstance(x, BASESTRING): + return x.decode('utf8', 'ignore') + else: + self.fail('must be a list of strings', view, True) + return map(convert, value) + + +class Filename(Template): + """A template that validates strings as filenames. + + Filenames are returned as absolute, tilde-free paths. + + Relative paths are relative to the template's `cwd` argument + when it is specified, then the configuration directory (see + the `config_dir` method) if they come from a file. Otherwise, + they are relative to the current working directory. This helps + attain the expected behavior when using command-line options. + """ + def __init__(self, default=REQUIRED, cwd=None, relative_to=None, + in_app_dir=False): + """ `relative_to` is the name of a sibling value that is + being validated at the same time. + + `in_app_dir` indicates whether the path should be resolved + inside the application's config directory (even when the setting + does not come from a file). + """ + super(Filename, self).__init__(default) + self.cwd = cwd + self.relative_to = relative_to + self.in_app_dir = in_app_dir + + def __repr__(self): + args = [] + + if self.default is not REQUIRED: + args.append(repr(self.default)) + + if self.cwd is not None: + args.append('cwd=' + repr(self.cwd)) + + if self.relative_to is not None: + args.append('relative_to=' + repr(self.relative_to)) + + if self.in_app_dir: + args.append('in_app_dir=True') + + return 'Filename({0})'.format(', '.join(args)) + + def resolve_relative_to(self, view, template): + if not isinstance(template, (collections.Mapping, MappingTemplate)): + # disallow config.get(Filename(relative_to='foo')) + raise ConfigTemplateError( + 'relative_to may only be used when getting multiple values.' + ) + + elif self.relative_to == view.key: + raise ConfigTemplateError( + '{0} is relative to itself'.format(view.name) + ) + + elif self.relative_to not in view.parent.keys(): + # self.relative_to is not in the config + self.fail( + ( + 'needs sibling value "{0}" to expand relative path' + ).format(self.relative_to), + view + ) + + old_template = {} + old_template.update(template.subtemplates) + + # save time by skipping MappingTemplate's init loop + next_template = MappingTemplate({}) + next_relative = self.relative_to + + # gather all the needed templates and nothing else + while next_relative is not None: + try: + # pop to avoid infinite loop because of recursive + # relative paths + rel_to_template = old_template.pop(next_relative) + except KeyError: + if next_relative in template.subtemplates: + # we encountered this config key previously + raise ConfigTemplateError(( + '{0} and {1} are recursively relative' + ).format(view.name, self.relative_to)) + else: + raise ConfigTemplateError(( + 'missing template for {0}, needed to expand {1}\'s' + + 'relative path' + ).format(self.relative_to, view.name)) + + next_template.subtemplates[next_relative] = rel_to_template + next_relative = rel_to_template.relative_to + + return view.parent.get(next_template)[self.relative_to] + + def value(self, view, template=None): + path, source = view.first() + if not isinstance(path, BASESTRING): + self.fail( + 'must be a filename, not {0}'.format(type(path).__name__), + view, + True + ) + path = os.path.expanduser(STRING(path)) + + if not os.path.isabs(path): + if self.cwd is not None: + # relative to the template's argument + path = os.path.join(self.cwd, path) + + elif self.relative_to is not None: + path = os.path.join( + self.resolve_relative_to(view, template), + path, + ) + + elif source.filename or self.in_app_dir: + # From defaults: relative to the app's directory. + path = os.path.join(view.root().config_dir(), path) + + return os.path.abspath(path) + + +class TypeTemplate(Template): + """A simple template that checks that a value is an instance of a + desired Python type. + """ + def __init__(self, typ, default=REQUIRED): + """Create a template that checks that the value is an instance + of `typ`. + """ + super(TypeTemplate, self).__init__(default) + self.typ = typ + + def convert(self, value, view): + if not isinstance(value, self.typ): + self.fail( + 'must be a {0}, not {1}'.format( + self.typ.__name__, + type(value).__name__, + ), + view, + True + ) + return value + + +class AttrDict(dict): + """A `dict` subclass that can be accessed via attributes (dot + notation) for convenience. + """ + def __getattr__(self, key): + if key in self: + return self[key] + else: + raise AttributeError(key) + + +def as_template(value): + """Convert a simple "shorthand" Python value to a `Template`. + """ + if isinstance(value, Template): + # If it's already a Template, pass it through. + return value + elif isinstance(value, collections.Mapping): + # Dictionaries work as templates. + return MappingTemplate(value) + elif value is int: + return Integer() + elif isinstance(value, int): + return Integer(value) + elif isinstance(value, type) and issubclass(value, BASESTRING): + return String() + elif isinstance(value, BASESTRING): + return String(value) + elif value is float: + return Number() + elif value is None: + return Template() + elif value is dict: + return TypeTemplate(collections.Mapping) + elif value is list: + return TypeTemplate(collections.Sequence) + elif isinstance(value, type): + return TypeTemplate(value) + else: + raise ValueError('cannot convert to template: {0!r}'.format(value)) diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py index e6ec0766..e8cd0fe1 100644 --- a/lib/beets/util/enumeration.py +++ b/lib/beets/util/enumeration.py @@ -12,167 +12,29 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""A metaclass for enumerated types that really are types. +from enum import Enum -You can create enumerations with `enum(values, [name])` and they work -how you would expect them to. - >>> from enumeration import enum - >>> Direction = enum('north east south west', name='Direction') - >>> Direction.west - Direction.west - >>> Direction.west == Direction.west - True - >>> Direction.west == Direction.east - False - >>> isinstance(Direction.west, Direction) - True - >>> Direction[3] - Direction.west - >>> Direction['west'] - Direction.west - >>> Direction.west.name - 'west' - >>> Direction.north < Direction.west - True - -Enumerations are classes; their instances represent the possible values -of the enumeration. Because Python classes must have names, you may -provide a `name` parameter to `enum`; if you don't, a meaningless one -will be chosen for you. -""" -import random - -class Enumeration(type): - """A metaclass whose classes are enumerations. - - The `values` attribute of the class is used to populate the - enumeration. Values may either be a list of enumerated names or a - string containing a space-separated list of names. When the class - is created, it is instantiated for each name value in `values`. - Each such instance is the name of the enumerated item as the sole - argument. - - The `Enumerated` class is a good choice for a superclass. +class OrderedEnum(Enum): """ - - def __init__(cls, name, bases, dic): - super(Enumeration, cls).__init__(name, bases, dic) - - if 'values' not in dic: - # Do nothing if no values are provided (i.e., with - # Enumerated itself). - return - - # May be called with a single string, in which case we split on - # whitespace for convenience. - values = dic['values'] - if isinstance(values, basestring): - values = values.split() - - # Create the Enumerated instances for each value. We have to use - # super's __setattr__ here because we disallow setattr below. - super(Enumeration, cls).__setattr__('_items_dict', {}) - super(Enumeration, cls).__setattr__('_items_list', []) - for value in values: - item = cls(value, len(cls._items_list)) - cls._items_dict[value] = item - cls._items_list.append(item) - - def __getattr__(cls, key): - try: - return cls._items_dict[key] - except KeyError: - raise AttributeError("enumeration '" + cls.__name__ + - "' has no item '" + key + "'") - - def __setattr__(cls, key, val): - raise TypeError("enumerations do not support attribute assignment") - - def __getitem__(cls, key): - if isinstance(key, int): - return cls._items_list[key] - else: - return getattr(cls, key) - - def __len__(cls): - return len(cls._items_list) - - def __iter__(cls): - return iter(cls._items_list) - - def __nonzero__(cls): - # Ensures that __len__ doesn't get called before __init__ by - # pydoc. - return True - -class Enumerated(object): - """An item in an enumeration. - - Contains instance methods inherited by enumerated objects. The - metaclass is preset to `Enumeration` for your convenience. - - Instance attributes: - name -- The name of the item. - index -- The index of the item in its enumeration. - - >>> from enumeration import Enumerated - >>> class Garment(Enumerated): - ... values = 'hat glove belt poncho lederhosen suspenders' - ... def wear(self): - ... print('now wearing a ' + self.name) - ... - >>> Garment.poncho.wear() - now wearing a poncho + An Enum subclass that allows comparison of members. """ + def __ge__(self, other): + if self.__class__ is other.__class__: + return self.value >= other.value + return NotImplemented - __metaclass__ = Enumeration + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value > other.value + return NotImplemented - def __init__(self, name, index): - self.name = name - self.index = index + def __le__(self, other): + if self.__class__ is other.__class__: + return self.value <= other.value + return NotImplemented - def __str__(self): - return type(self).__name__ + '.' + self.name - - def __repr__(self): - return str(self) - - def __cmp__(self, other): - if type(self) is type(other): - # Note that we're assuming that the items are direct - # instances of the same Enumeration (i.e., no fancy - # subclassing), which is probably okay. - return cmp(self.index, other.index) - else: - return NotImplemented - -def enum(*values, **kwargs): - """Shorthand for creating a new Enumeration class. - - Call with enumeration values as a list, a space-delimited string, or - just an argument list. To give the class a name, pass it as the - `name` keyword argument. Otherwise, a name will be chosen for you. - - The following are all equivalent: - - enum('pinkie ring middle index thumb') - enum('pinkie', 'ring', 'middle', 'index', 'thumb') - enum(['pinkie', 'ring', 'middle', 'index', 'thumb']) - """ - - if ('name' not in kwargs) or kwargs['name'] is None: - # Create a probably-unique name. It doesn't really have to be - # unique, but getting distinct names each time helps with - # identification in debugging. - name = 'Enumeration' + hex(random.randint(0,0xfffffff))[2:].upper() - else: - name = kwargs['name'] - - if len(values) == 1: - # If there's only one value, we have a couple of alternate calling - # styles. - if isinstance(values[0], basestring) or hasattr(values[0], '__iter__'): - values = values[0] - - return type(name, (Enumerated,), {'values': values}) + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value < other.value + return NotImplemented diff --git a/lib/beets/util/functemplate.py b/lib/beets/util/functemplate.py index 0fce41e5..03e57c61 100644 --- a/lib/beets/util/functemplate.py +++ b/lib/beets/util/functemplate.py @@ -42,6 +42,7 @@ ESCAPE_CHAR = u'$' VARIABLE_PREFIX = '__var_' FUNCTION_PREFIX = '__func_' + class Environment(object): """Contains the values and functions to be substituted into a template. @@ -57,10 +58,12 @@ def ex_lvalue(name): """A variable load expression.""" return ast.Name(name, ast.Store()) + def ex_rvalue(name): """A variable store expression.""" return ast.Name(name, ast.Load()) + def ex_literal(val): """An int, float, long, bool, string, or None literal with the given value. @@ -75,6 +78,7 @@ def ex_literal(val): return ast.Str(val) raise TypeError('no literal for {0}'.format(type(val))) + def ex_varassign(name, expr): """Assign an expression into a single variable. The expression may either be an `ast.expr` object or a value to be used as a literal. @@ -83,6 +87,7 @@ def ex_varassign(name, expr): expr = ex_literal(expr) return ast.Assign([ex_lvalue(name)], expr) + def ex_call(func, args): """A function-call expression with only positional parameters. The function may be an expression or the name of a function. Each @@ -98,6 +103,7 @@ def ex_call(func, args): return ast.Call(func, args, [], None, None) + def compile_func(arg_names, statements, name='_the_func', debug=False): """Compile a list of statements as the body of a function and return the resulting Python function. If `debug`, then print out the @@ -157,6 +163,7 @@ class Symbol(object): expr = ex_rvalue(VARIABLE_PREFIX + self.ident.encode('utf8')) return [expr], set([self.ident.encode('utf8')]), set() + class Call(object): """A function call in a template.""" def __init__(self, ident, args, original): @@ -214,6 +221,7 @@ class Call(object): ) return [subexpr_call], varnames, funcnames + class Expression(object): """Top-level template construct: contains a list of text blobs, Symbols, and Calls. @@ -259,6 +267,7 @@ class Expression(object): class ParseError(Exception): pass + class Parser(object): """Parses a template expression string. Instantiate the class with the template source and call ``parse_expression``. The ``pos`` field @@ -316,13 +325,13 @@ class Parser(object): next_char = self.string[self.pos + 1] if char == ESCAPE_CHAR and next_char in \ - (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): + (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): # An escaped special character ($$, $}, etc.). Note that # ${ is not an escape sequence: this is ambiguous with # the start of a symbol and it's not necessary (just # using { suffices in all cases). text_parts.append(next_char) - self.pos += 2 # Skip the next character. + self.pos += 2 # Skip the next character. continue # Shift all characters collected so far into a single string. @@ -372,7 +381,7 @@ class Parser(object): if next_char == GROUP_OPEN: # A symbol like ${this}. - self.pos += 1 # Skip opening. + self.pos += 1 # Skip opening. closer = self.string.find(GROUP_CLOSE, self.pos) if closer == -1 or closer == self.pos: # No closing brace found or identifier is empty. @@ -431,7 +440,7 @@ class Parser(object): self.parts.append(self.string[start_pos:self.pos]) return - self.pos += 1 # Move past closing brace. + self.pos += 1 # Move past closing brace. self.parts.append(Call(ident, args, self.string[start_pos:self.pos])) def parse_argument_list(self): @@ -472,6 +481,7 @@ class Parser(object): self.pos += len(ident) return ident + def _parse(template): """Parse a top-level template string Expression. Any extraneous text is considered literal text. diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py index c64454ff..d267789c 100644 --- a/lib/beets/util/pipeline.py +++ b/lib/beets/util/pipeline.py @@ -35,13 +35,13 @@ from __future__ import print_function import Queue from threading import Thread, Lock import sys -import types BUBBLE = '__PIPELINE_BUBBLE__' POISON = '__PIPELINE_POISON__' DEFAULT_QUEUE_SIZE = 16 + def _invalidate_queue(q, val=None, sync=True): """Breaks a Queue such that it never blocks, always has size 1, and has no maximum size. get()ing from the queue returns `val`, @@ -50,8 +50,10 @@ def _invalidate_queue(q, val=None, sync=True): """ def _qsize(len=len): return 1 + def _put(item): pass + def _get(): return val @@ -70,6 +72,7 @@ def _invalidate_queue(q, val=None, sync=True): if sync: q.mutex.release() + class CountedQueue(Queue.Queue): """A queue that keeps track of the number of threads that are still feeding into it. The queue is poisoned when all threads are @@ -104,6 +107,7 @@ class CountedQueue(Queue.Queue): # Replacement _get invalidates when no items remain. _old_get = self._get + def _get(): out = _old_get() if not self.queue: @@ -117,18 +121,67 @@ class CountedQueue(Queue.Queue): # No items. Invalidate immediately. _invalidate_queue(self, POISON, False) + class MultiMessage(object): """A message yielded by a pipeline stage encapsulating multiple values to be sent to the next stage. """ def __init__(self, messages): self.messages = messages + + def multiple(messages): """Yield multiple([message, ..]) from a pipeline stage to send multiple values to the next pipeline stage. """ return MultiMessage(messages) + +def stage(func): + """Decorate a function to become a simple stage. + + >>> @stage + ... def add(n, i): + ... return i + n + >>> pipe = Pipeline([ + ... iter([1, 2, 3]), + ... add(2), + ... ]) + >>> list(pipe.pull()) + [3, 4, 5] + """ + + def coro(*args): + task = None + while True: + task = yield task + task = func(*(args + (task,))) + return coro + + +def mutator_stage(func): + """Decorate a function that manipulates items in a coroutine to + become a simple stage. + + >>> @mutator_stage + ... def setkey(key, item): + ... item[key] = True + >>> pipe = Pipeline([ + ... iter([{'x': False}, {'a': False}]), + ... setkey('x'), + ... ]) + >>> list(pipe.pull()) + [{'x': True}, {'a': False, 'x': True}] + """ + + def coro(*args): + task = None + while True: + task = yield task + func(*(args + (task,))) + return coro + + def _allmsgs(obj): """Returns a list of all the messages encapsulated in obj. If obj is a MultiMessage, returns its enclosed messages. If obj is BUBBLE, @@ -141,6 +194,7 @@ def _allmsgs(obj): else: return [obj] + class PipelineThread(Thread): """Abstract base class for pipeline-stage threads.""" def __init__(self, all_threads): @@ -169,6 +223,7 @@ class PipelineThread(Thread): for thread in self.all_threads: thread.abort() + class FirstPipelineThread(PipelineThread): """The thread running the first stage in a parallel pipeline setup. The coroutine should just be a generator. @@ -209,6 +264,7 @@ class FirstPipelineThread(PipelineThread): # Generator finished; shut down the pipeline. self.out_queue.release() + class MiddlePipelineThread(PipelineThread): """A thread running any stage in the pipeline except the first or last. @@ -256,6 +312,7 @@ class MiddlePipelineThread(PipelineThread): # Pipeline is shutting down normally. self.out_queue.release() + class LastPipelineThread(PipelineThread): """A thread running the last stage in a pipeline. The coroutine should yield nothing. @@ -291,6 +348,7 @@ class LastPipelineThread(PipelineThread): self.abort_all(sys.exc_info()) return + class Pipeline(object): """Represents a staged pattern of work. Each stage in the pipeline is a coroutine that receives messages from the previous stage and @@ -322,7 +380,8 @@ class Pipeline(object): messages between the stages are stored in queues of the given size. """ - queues = [CountedQueue(queue_size) for i in range(len(self.stages)-1)] + queue_count = len(self.stages) - 1 + queues = [CountedQueue(queue_size) for i in range(queue_count)] threads = [] # Set up first stage. @@ -330,10 +389,10 @@ class Pipeline(object): threads.append(FirstPipelineThread(coro, queues[0], threads)) # Middle stages. - for i in range(1, len(self.stages)-1): + for i in range(1, queue_count): for coro in self.stages[i]: threads.append(MiddlePipelineThread( - coro, queues[i-1], queues[i], threads + coro, queues[i - 1], queues[i], threads )) # Last stage. @@ -408,17 +467,20 @@ if __name__ == '__main__': print('generating %i' % i) time.sleep(1) yield i + def work(): num = yield while True: print('processing %i' % num) time.sleep(2) - num = yield num*2 + num = yield num * 2 + def consume(): while True: num = yield time.sleep(1) print('received %i' % num) + ts_start = time.time() Pipeline([produce(), work(), consume()]).run_sequential() ts_seq = time.time() @@ -437,6 +499,7 @@ if __name__ == '__main__': print('generating %i' % i) time.sleep(1) yield i + def exc_work(): num = yield while True: @@ -445,10 +508,10 @@ if __name__ == '__main__': if num == 3: raise Exception() num = yield num * 2 + def exc_consume(): while True: num = yield - #if num == 4: - # raise Exception() print('received %i' % num) + Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py index 235f3604..e940e21f 100644 --- a/lib/beets/vfs.py +++ b/lib/beets/vfs.py @@ -20,6 +20,7 @@ from beets import util Node = namedtuple('Node', ['files', 'dirs']) + def _insert(node, path, itemid): """Insert an item into a virtual filesystem node.""" if len(path) == 1: @@ -33,6 +34,7 @@ def _insert(node, path, itemid): node.dirs[dirname] = Node({}, {}) _insert(node.dirs[dirname], rest, itemid) + def libtree(lib): """Generates a filesystem-like directory tree for the files contained in `lib`. Filesystem nodes are (files, dirs) named diff --git a/lib/beetsplug/embedart.py b/lib/beetsplug/embedart.py index dfdabf5e..49ed4792 100644 --- a/lib/beetsplug/embedart.py +++ b/lib/beetsplug/embedart.py @@ -16,6 +16,9 @@ import os.path import logging import imghdr +import subprocess +import platform +from tempfile import NamedTemporaryFile from beets.plugins import BeetsPlugin from beets import mediafile @@ -25,6 +28,7 @@ from beets.util import syspath, normpath, displayable_path from beets.util.artresizer import ArtResizer from beets import config + log = logging.getLogger('beets') @@ -36,12 +40,19 @@ class EmbedCoverArtPlugin(BeetsPlugin): self.config.add({ 'maxwidth': 0, 'auto': True, + 'compare_threshold': 0, + 'ifempty': False, }) - if self.config['maxwidth'].get(int) and \ - not ArtResizer.shared.local: + + if self.config['maxwidth'].get(int) and not ArtResizer.shared.local: self.config['maxwidth'] = 0 log.warn(u"embedart: ImageMagick or PIL not found; " u"'maxwidth' option ignored") + if self.config['compare_threshold'].get(int) and not \ + ArtResizer.shared.can_compare: + self.config['compare_threshold'] = 0 + log.warn(u"embedart: ImageMagick 6.8.7 or higher not installed; " + u"'compare_threshold' option ignored") def commands(self): # Embed command. @@ -52,12 +63,15 @@ class EmbedCoverArtPlugin(BeetsPlugin): '-f', '--file', metavar='PATH', help='the image file to embed' ) maxwidth = config['embedart']['maxwidth'].get(int) + compare_threshold = config['embedart']['compare_threshold'].get(int) + ifempty = config['embedart']['ifempty'].get(bool) def embed_func(lib, opts, args): if opts.file: imagepath = normpath(opts.file) for item in lib.items(decargs(args)): - embed_item(item, imagepath, maxwidth) + embed_item(item, imagepath, maxwidth, None, + compare_threshold, ifempty) else: for album in lib.albums(decargs(args)): embed_album(album, maxwidth) @@ -72,7 +86,8 @@ class EmbedCoverArtPlugin(BeetsPlugin): def extract_func(lib, opts, args): outpath = normpath(opts.outpath or 'cover') - extract(lib, outpath, decargs(args)) + item = lib.items(decargs(args)).get() + extract(outpath, item) extract_cmd.func = extract_func # Clear command. @@ -91,23 +106,43 @@ def album_imported(lib, album): """Automatically embed art into imported albums. """ if album.artpath and config['embedart']['auto']: - embed_album(album, config['embedart']['maxwidth'].get(int)) + embed_album(album, config['embedart']['maxwidth'].get(int), True) -def embed_item(item, imagepath, maxwidth=None, itempath=None): +def embed_item(item, imagepath, maxwidth=None, itempath=None, + compare_threshold=0, ifempty=False, as_album=False): """Embed an image into the item's media file. """ + if compare_threshold: + if not check_art_similarity(item, imagepath, compare_threshold): + log.warn(u'Image not similar; skipping.') + return + if ifempty: + art = get_art(item) + if not art: + pass + else: + log.debug(u'embedart: media file contained art already {0}'.format( + displayable_path(imagepath) + )) + return + if maxwidth and not as_album: + imagepath = resize_image(imagepath, maxwidth) + try: + log.debug(u'embedart: embedding {0}'.format( + displayable_path(imagepath) + )) item['images'] = [_mediafile_image(imagepath, maxwidth)] - item.try_write(itempath) except IOError as exc: log.error(u'embedart: could not read image file: {0}'.format(exc)) - finally: - # We don't want to store the image in the database + else: + # We don't want to store the image in the database. + item.try_write(itempath) del item['images'] -def embed_album(album, maxwidth=None): +def embed_album(album, maxwidth=None, quiet=False): """Embed album art into all of the album's items. """ imagepath = album.artpath @@ -115,39 +150,78 @@ def embed_album(album, maxwidth=None): log.info(u'No album art present: {0} - {1}'. format(album.albumartist, album.album)) return - if not os.path.isfile(imagepath): + if not os.path.isfile(syspath(imagepath)): log.error(u'Album art not found at {0}' - .format(imagepath)) + .format(displayable_path(imagepath))) return + if maxwidth: + imagepath = resize_image(imagepath, maxwidth) - log.info(u'Embedding album art into {0.albumartist} - {0.album}.' - .format(album)) + log.log( + logging.DEBUG if quiet else logging.INFO, + u'Embedding album art into {0.albumartist} - {0.album}.'.format(album), + ) for item in album.items(): - embed_item(item, imagepath, maxwidth) + embed_item(item, imagepath, maxwidth, None, + config['embedart']['compare_threshold'].get(int), + config['embedart']['ifempty'].get(bool), as_album=True) + + +def resize_image(imagepath, maxwidth): + """Returns path to an image resized to maxwidth. + """ + log.info(u'Resizing album art to {0} pixels wide' + .format(maxwidth)) + imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) + return imagepath + + +def check_art_similarity(item, imagepath, compare_threshold): + """A boolean indicating if an image is similar to embedded item art. + """ + with NamedTemporaryFile(delete=True) as f: + art = extract(f.name, item) + + if art: + # Converting images to grayscale tends to minimize the weight + # of colors in the diff score + cmd = 'convert {0} {1} -colorspace gray MIFF:- | ' \ + 'compare -metric PHASH - null:'.format(syspath(imagepath), + syspath(art)) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=platform.system() != 'Windows', + shell=True) + stdout, stderr = proc.communicate() + if proc.returncode: + if proc.returncode != 1: + log.warn(u'embedart: IM phashes compare failed for {0}, \ + {1}'.format(displayable_path(imagepath), + displayable_path(art))) + return + phashDiff = float(stderr) + else: + phashDiff = float(stdout) + + log.info(u'embedart: compare PHASH score is {0}'.format(phashDiff)) + if phashDiff > compare_threshold: + return False + + return True def _mediafile_image(image_path, maxwidth=None): """Return a `mediafile.Image` object for the path. - - If maxwidth is set the image is resized if necessary. """ - if maxwidth: - image_path = ArtResizer.shared.resize(maxwidth, syspath(image_path)) with open(syspath(image_path), 'rb') as f: data = f.read() return mediafile.Image(data, type=mediafile.ImageType.front) -# 'extractart' command. - -def extract(lib, outpath, query): - item = lib.items(query).get() - if not item: - log.error(u'No item matches query.') - return - +def get_art(item): # Extract the art. try: mf = mediafile.MediaFile(syspath(item.path)) @@ -157,7 +231,18 @@ def extract(lib, outpath, query): )) return - art = mf.art + return mf.art + +# 'extractart' command. + + +def extract(outpath, item): + if not item: + log.error(u'No item matches query.') + return + + art = get_art(item) + if not art: log.error(u'No album art present in {0} - {1}.' .format(item.artist, item.title)) @@ -170,10 +255,11 @@ def extract(lib, outpath, query): return outpath += '.' + ext - log.info(u'Extracting album art from: {0.artist} - {0.title}\n' - u'To: {1}'.format(item, displayable_path(outpath))) + log.info(u'Extracting album art from: {0.artist} - {0.title} ' + u'to: {1}'.format(item, displayable_path(outpath))) with open(syspath(outpath), 'wb') as f: f.write(art) + return outpath # 'clearart' command. @@ -190,5 +276,5 @@ def clear(lib, query): displayable_path(item.path), exc )) continue - mf.art = None + del mf.art mf.save() diff --git a/lib/beetsplug/fetchart.py b/lib/beetsplug/fetchart.py index 1474a7b0..b2a4620b 100644 --- a/lib/beetsplug/fetchart.py +++ b/lib/beetsplug/fetchart.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2014, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -22,12 +22,18 @@ from tempfile import NamedTemporaryFile import requests -from beets.plugins import BeetsPlugin -from beets.util.artresizer import ArtResizer +from beets import plugins from beets import importer from beets import ui from beets import util from beets import config +from beets.util.artresizer import ArtResizer + +try: + import itunes + HAVE_ITUNES = True +except ImportError: + HAVE_ITUNES = False IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg'] CONTENT_TYPES = ('image/jpeg',) @@ -73,17 +79,14 @@ CAA_URL = 'http://coverartarchive.org/release/{mbid}/front-500.jpg' CAA_GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front-500.jpg' -def caa_art(release_id): - """Return the Cover Art Archive URL given a MusicBrainz release ID. +def caa_art(album): + """Return the Cover Art Archive and Cover Art Archive release group URLs + using album MusicBrainz release ID and release group ID. """ - return CAA_URL.format(mbid=release_id) - - -def caa_group_art(release_group_id): - """Return the Cover Art Archive release group URL given a MusicBrainz - release group ID. - """ - return CAA_GROUP_URL.format(mbid=release_group_id) + if album.mb_albumid: + yield CAA_URL.format(mbid=album.mb_albumid) + if album.mb_releasegroupid: + yield CAA_GROUP_URL.format(mbid=album.mb_releasegroupid) # Art from Amazon. @@ -92,10 +95,12 @@ AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' AMAZON_INDICES = (1, 2) -def art_for_asin(asin): - """Generate URLs for an Amazon ID (ASIN) string.""" - for index in AMAZON_INDICES: - yield AMAZON_URL % (asin, index) +def art_for_asin(album): + """Generate URLs using Amazon ID (ASIN) string. + """ + if album.asin: + for index in AMAZON_INDICES: + yield AMAZON_URL % (album.asin, index) # AlbumArt.org scraper. @@ -104,11 +109,14 @@ AAO_URL = 'http://www.albumart.org/index_detail.php' AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' -def aao_art(asin): - """Return art URL from AlbumArt.org given an ASIN.""" +def aao_art(album): + """Return art URL from AlbumArt.org using album ASIN. + """ + if not album.asin: + return # Get the page from albumart.org. try: - resp = requests_session.get(AAO_URL, params={'asin': asin}) + resp = requests_session.get(AAO_URL, params={'asin': album.asin}) log.debug(u'fetchart: scraped art URL: {0}'.format(resp.url)) except requests.RequestException: log.debug(u'fetchart: error scraping art page') @@ -118,7 +126,7 @@ def aao_art(asin): m = re.search(AAO_PAT, resp.text) if m: image_url = m.group(1) - return image_url + yield image_url else: log.debug(u'fetchart: no image found on page') @@ -132,6 +140,8 @@ def google_art(album): """Return art URL from google.org given an album title and interpreter. """ + if not (album.albumartist and album.album): + return search_string = (album.albumartist + ',' + album.album).encode('utf-8') response = requests_session.get(GOOGLE_URL, params={ 'v': '1.0', @@ -145,14 +155,39 @@ def google_art(album): data = results['responseData'] dataInfo = data['results'] for myUrl in dataInfo: - return myUrl['unescapedUrl'] + yield myUrl['unescapedUrl'] except: log.debug(u'fetchart: error scraping art page') return +# Art from the iTunes Store. + +def itunes_art(album): + """Return art URL from iTunes Store given an album title. + """ + search_string = (album.albumartist + ' ' + album.album).encode('utf-8') + try: + # Isolate bugs in the iTunes library while searching. + try: + itunes_album = itunes.search_album(search_string)[0] + except Exception as exc: + log.debug('fetchart: iTunes search failed: {0}'.format(exc)) + return + + if itunes_album.get_artwork()['100']: + small_url = itunes_album.get_artwork()['100'] + big_url = small_url.replace('100x100', '1200x1200') + yield big_url + else: + log.debug(u'fetchart: album has no artwork in iTunes Store') + except IndexError: + log.debug(u'fetchart: album not found in iTunes Store') + + # Art from the filesystem. + def filename_priority(filename, cover_names): """Sort order for image names. @@ -164,7 +199,8 @@ def filename_priority(filename, cover_names): def art_in_path(path, cover_names, cautious): - """Look for album art files in a specified directory.""" + """Look for album art files in a specified directory. + """ if not os.path.isdir(path): return @@ -195,31 +231,27 @@ def art_in_path(path, cover_names, cautious): # Try each source in turn. +SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google'] -def _source_urls(album): +ART_FUNCS = { + u'coverart': caa_art, + u'itunes': itunes_art, + u'albumart': aao_art, + u'amazon': art_for_asin, + u'google': google_art, +} + + +def _source_urls(album, sources=SOURCES_ALL): """Generate possible source URLs for an album's art. The URLs are not guaranteed to work so they each need to be attempted in turn. This allows the main `art_for_album` function to abort iteration through this sequence early to avoid the cost of scraping when not necessary. """ - # Cover Art Archive. - if album.mb_albumid: - yield caa_art(album.mb_albumid) - if album.mb_releasegroupid: - yield caa_group_art(album.mb_releasegroupid) - - # Amazon and AlbumArt.org. - if album.asin: - for url in art_for_asin(album.asin): - yield url - url = aao_art(album.asin) - if url: - yield url - - if config['fetchart']['google_search']: - url = google_art(album) - if url: + for s in sources: + urls = ART_FUNCS[s](album) + for url in urls: yield url @@ -245,7 +277,8 @@ def art_for_album(album, paths, maxwidth=None, local_only=False): # Web art sources. remote_priority = config['fetchart']['remote_priority'].get(bool) if not local_only and (remote_priority or not out): - for url in _source_urls(album): + for url in _source_urls(album, + config['fetchart']['sources'].as_str_seq()): if maxwidth: url = ArtResizer.shared.proxy_url(maxwidth, url) candidate = _fetch_image(url) @@ -286,7 +319,7 @@ def batch_fetch_art(lib, albums, force, maxwidth=None): message)) -class FetchArtPlugin(BeetsPlugin): +class FetchArtPlugin(plugins.BeetsPlugin): def __init__(self): super(FetchArtPlugin, self).__init__() @@ -297,6 +330,7 @@ class FetchArtPlugin(BeetsPlugin): 'cautious': False, 'google_search': False, 'cover_names': ['cover', 'front', 'art', 'album', 'folder'], + 'sources': SOURCES_ALL, }) # Holds paths to downloaded images between fetching them and @@ -309,6 +343,12 @@ class FetchArtPlugin(BeetsPlugin): self.import_stages = [self.fetch_art] self.register_listener('import_task_files', self.assign_art) + available_sources = list(SOURCES_ALL) + if not HAVE_ITUNES and u'itunes' in available_sources: + available_sources.remove(u'itunes') + self.config['sources'] = plugins.sanitize_choices( + self.config['sources'].as_str_seq(), available_sources) + # Asynchronous; after music is added to the library. def fetch_art(self, session, task): """Find art for the album being imported.""" diff --git a/lib/beetsplug/lyrics.py b/lib/beetsplug/lyrics.py index 019faa4c..a2ebe7c3 100644 --- a/lib/beetsplug/lyrics.py +++ b/lib/beetsplug/lyrics.py @@ -18,25 +18,26 @@ from __future__ import print_function import re import logging -import urllib +import requests import json import unicodedata +import urllib import difflib import itertools +from HTMLParser import HTMLParseError -from beets.plugins import BeetsPlugin -from beets import ui -from beets import config +from beets import plugins +from beets import config, ui # Global logger. log = logging.getLogger('beets') -DIV_RE = re.compile(r'<(/?)div>?') +DIV_RE = re.compile(r'<(/?)div>?', re.I) COMMENT_RE = re.compile(r'', re.S) TAG_RE = re.compile(r'<[^>]*>') -BREAK_RE = re.compile(r'') +BREAK_RE = re.compile(r'\n?\s*]*)*>\s*\n?', re.I) URL_CHARACTERS = { u'\u2018': u"'", u'\u2019': u"'", @@ -60,10 +61,14 @@ def fetch_url(url): is unreachable. """ try: - return urllib.urlopen(url).read() - except IOError as exc: - log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc))) - return None + r = requests.get(url, verify=False) + except requests.RequestException as exc: + log.debug(u'lyrics request failed: {0}'.format(exc)) + return + if r.status_code == requests.codes.ok: + return r.text + else: + log.debug(u'failed to fetch: {0} ({1})'.format(url, r.status_code)) def unescape(text): @@ -79,10 +84,20 @@ def unescape(text): return out -def extract_text(html, starttag): +def extract_text_between(html, start_marker, end_marker): + try: + _, html = html.split(start_marker, 1) + html, _ = html.split(end_marker, 1) + except ValueError: + return u'' + return html + + +def extract_text_in(html, starttag): """Extract the text from a
tag in the HTML starting with ``starttag``. Returns None if parsing fails. """ + # Strip off the leading text before opening tag. try: _, html = html.split(starttag, 1) @@ -101,7 +116,6 @@ def extract_text(html, starttag): else: # Opening tag. if level == 0: parts.append(html[pos:match.start()]) - level += 1 if level == -1: @@ -110,26 +124,7 @@ def extract_text(html, starttag): else: print('no closing tag found!') return - lyrics = ''.join(parts) - return strip_cruft(lyrics) - - -def strip_cruft(lyrics, wscollapse=True): - """Clean up HTML from an extracted lyrics string. For example,
- tags are replaced with newlines. - """ - lyrics = COMMENT_RE.sub('', lyrics) - lyrics = unescape(lyrics) - if wscollapse: - lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse. - lyrics = re.sub(r'<(script).*?(?s)', '', lyrics) # Strip script tags. - lyrics = BREAK_RE.sub('\n', lyrics) #
newlines. - lyrics = re.sub(r'\n +', '\n', lyrics) - lyrics = re.sub(r' +\n', '\n', lyrics) - lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags. - lyrics = lyrics.replace('\r', '\n') - lyrics = lyrics.strip() - return lyrics + return u''.join(parts) def search_pairs(item): @@ -140,7 +135,7 @@ def search_pairs(item): In addition to the artist and title obtained from the `item` the method tries to strip extra information like paranthesized suffixes - and featured artists from the strings and add them as caniddates. + and featured artists from the strings and add them as candidates. The method also tries to split multiple titles separated with `/`. """ @@ -149,7 +144,7 @@ def search_pairs(item): artists = [artist] # Remove any featuring artists from the artists name - pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))" + pattern = r"(.*?) {0}".format(plugins.feat_tokens()) match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) @@ -162,8 +157,8 @@ def search_pairs(item): titles.append(match.group(1)) # Remove any featuring artists from the title - pattern = r"(.*?) \b(ft|feat(uring)?)\b" - for title in titles: + pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) + for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: titles.append(match.group(1)) @@ -189,6 +184,19 @@ def _encode(s): s = s.encode('utf8', 'ignore') return urllib.quote(s) +# Musixmatch + +MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + + +def fetch_musixmatch(artist, title): + url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()), + _lw_encode(title.title())) + html = fetch_url(url) + if not html: + return + lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":') + return lyrics.strip(',"').replace('\\n', '\n') # LyricsWiki. @@ -212,7 +220,7 @@ def fetch_lyricswiki(artist, title): if not html: return - lyrics = extract_text(html, "
") + lyrics = extract_text_in(html, u"
") if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics @@ -238,8 +246,8 @@ def fetch_lyricscom(artist, title): html = fetch_url(url) if not html: return - - lyrics = extract_text(html, '
') + lyrics = extract_text_between(html, '
', '
') if not lyrics: return for not_found_str in LYRICSCOM_NOT_FOUND: @@ -280,7 +288,6 @@ def is_page_candidate(urlLink, urlTitle, title, artist): artist = slugify(artist.lower()) sitename = re.search(u"//([^/]+)/.*", slugify(urlLink.lower())).group(1) urlTitle = slugify(urlTitle.lower()) - # Check if URL title contains song title (exact match) if urlTitle.find(title) != -1: return True @@ -289,41 +296,11 @@ def is_page_candidate(urlLink, urlTitle, title, artist): tokens = [by + '_' + artist for by in BY_TRANS] + \ [artist, sitename, sitename.replace('www.', '')] + LYRICS_TRANS songTitle = re.sub(u'(%s)' % u'|'.join(tokens), u'', urlTitle) - - typoRatio = .8 + songTitle = songTitle.strip('_|') + typoRatio = .9 return difflib.SequenceMatcher(None, songTitle, title).ratio() >= typoRatio -def insert_line_feeds(text): - """Insert newlines before upper-case characters. - """ - tokensStr = re.split("([a-z][A-Z])", text) - for idx in range(1, len(tokensStr), 2): - ltoken = list(tokensStr[idx]) - tokensStr[idx] = ltoken[0] + '\n' + ltoken[1] - return ''.join(tokensStr) - - -def sanitize_lyrics(text): - """Clean text, returning raw lyrics as output or None if it happens - that input text is actually not lyrics content. Clean (x)html tags - in text, correct layout and syntax... - """ - text = strip_cruft(text, False) - - # Restore \n in input text - if '\n' not in text: - text = insert_line_feeds(text) - - while text.count('\n\n') > text.count('\n') // 4: - # Remove first occurrence of \n for each sequence of \n - text = re.sub(r'\n(\n+)', '\g<1>', text) - - text = re.sub(r'\n\n+', '\n\n', text) # keep at most two \n in a row - - return text - - def remove_credits(text): """Remove first/last line of text if it contains the word 'lyrics' eg 'Lyrics by songsdatabase.com' @@ -342,13 +319,12 @@ def is_lyrics(text, artist=None): """Determine whether the text seems to be valid lyrics. """ if not text: - return - + return False badTriggersOcc = [] nbLines = text.count('\n') if nbLines <= 1: log.debug(u"Ignoring too short lyrics '{0}'".format(text)) - return 0 + return False elif nbLines < 5: badTriggersOcc.append('too_short') else: @@ -356,7 +332,7 @@ def is_lyrics(text, artist=None): # down text = remove_credits(text) - badTriggers = ['lyrics', 'copyright', 'property'] + badTriggers = ['lyrics', 'copyright', 'property', 'links'] if artist: badTriggersOcc += [artist] @@ -366,62 +342,58 @@ def is_lyrics(text, artist=None): if badTriggersOcc: log.debug(u'Bad triggers detected: {0}'.format(badTriggersOcc)) - return len(badTriggersOcc) < 2 -def scrape_lyrics_from_url(url): +def _scrape_strip_cruft(html, plain_text_out=False): + """Clean up HTML + """ + html = unescape(html) + + html = html.replace('\r', '\n') # Normalize EOL. + html = re.sub(r' +', ' ', html) # Whitespaces collapse. + html = BREAK_RE.sub('\n', html) #
eats up surrounding '\n'. + html = re.sub(r'<(script).*?(?s)', '', html) # Strip script tags. + + if plain_text_out: # Strip remaining HTML tags + html = COMMENT_RE.sub('', html) + html = TAG_RE.sub('', html) + + html = '\n'.join([x.strip() for x in html.strip().split('\n')]) + html = re.sub(r'\n{3,}', r'\n\n', html) + return html + + +def _scrape_merge_paragraphs(html): + html = re.sub(r'

\s*]*)>', '\n', html) + return re.sub(r'
\s*
', '\n', html) + + +def scrape_lyrics_from_html(html): """Scrape lyrics from a URL. If no lyrics can be found, return None instead. """ - from bs4 import BeautifulSoup, Comment - html = fetch_url(url) + from bs4 import SoupStrainer, BeautifulSoup + if not html: return None - soup = BeautifulSoup(html) - - for tag in soup.findAll('br'): - tag.replaceWith('\n') - - # Remove non relevant html parts - [s.extract() for s in soup(['head', 'script'])] - comments = soup.findAll(text=lambda text: isinstance(text, Comment)) - [s.extract() for s in comments] + def is_text_notcode(text): + length = len(text) + return (length > 20 and + text.count(' ') > length / 25 and + (text.find('{') == -1 or text.find(';') == -1)) + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + # extract all long text blocks that are not code try: - for tag in soup.findAll(True): - tag.name = 'p' # keep tag contents - - except Exception, e: - log.debug(u'Error {0} when replacing containing marker by p marker' - .format(e, exc_info=True)) - - # Make better soup from current soup! The previous unclosed

sections - # are now closed. Use str() rather than prettify() as it's more - # conservative concerning EOL - soup = BeautifulSoup(str(soup)) - - # In case lyrics are nested in no markup but - # Insert the whole body in a

- bodyTag = soup.find('body') - if bodyTag: - pTag = soup.new_tag("p") - bodyTag.parent.insert(0, pTag) - pTag.insert(0, bodyTag) - - tagTokens = [] - - for tag in soup.findAll('p'): - soup2 = BeautifulSoup(str(tag)) - # Extract all text of

section. - tagTokens += soup2.findAll(text=True) - - if tagTokens: - # Lyrics are expected to be the longest paragraph - tagTokens = sorted(tagTokens, key=len, reverse=True) - soup = BeautifulSoup(tagTokens[0]) - return unescape(tagTokens[0].strip("\n\r: ")) + soup = BeautifulSoup(html, "html.parser", + parse_only=SoupStrainer(text=is_text_notcode)) + except HTMLParseError: + return None + soup = sorted(soup.stripped_strings, key=len)[-1] + return soup def fetch_google(artist, title): @@ -443,15 +415,14 @@ def fetch_google(artist, title): if 'items' in data.keys(): for item in data['items']: urlLink = item['link'] - urlTitle = item['title'] + urlTitle = item.get('title', u'') if not is_page_candidate(urlLink, urlTitle, title, artist): continue - lyrics = scrape_lyrics_from_url(urlLink) + html = fetch_url(urlLink) + lyrics = scrape_lyrics_from_html(html) if not lyrics: continue - lyrics = sanitize_lyrics(lyrics) - if is_lyrics(lyrics, artist): log.debug(u'got lyrics from {0}'.format(item['displayLink'])) return lyrics @@ -459,8 +430,16 @@ def fetch_google(artist, title): # Plugin logic. +SOURCES = ['google', 'lyricwiki', 'lyrics.com', 'musixmatch'] +SOURCE_BACKENDS = { + 'google': fetch_google, + 'lyricwiki': fetch_lyricswiki, + 'lyrics.com': fetch_lyricscom, + 'musixmatch': fetch_musixmatch, +} -class LyricsPlugin(BeetsPlugin): + +class LyricsPlugin(plugins.BeetsPlugin): def __init__(self): super(LyricsPlugin, self).__init__() self.import_stages = [self.imported] @@ -469,12 +448,19 @@ class LyricsPlugin(BeetsPlugin): 'google_API_key': None, 'google_engine_ID': u'009217259823014548361:lndtuqkycfu', 'fallback': None, + 'force': False, + 'sources': SOURCES, }) - self.backends = [fetch_lyricswiki, fetch_lyricscom] - - if self.config['google_API_key'].get(): - self.backends.insert(0, fetch_google) + available_sources = list(SOURCES) + if not self.config['google_API_key'].get() and \ + 'google' in SOURCES: + available_sources.remove('google') + self.config['sources'] = plugins.sanitize_choices( + self.config['sources'].as_str_seq(), available_sources) + self.backends = [] + for key in self.config['sources'].as_str_seq(): + self.backends.append(SOURCE_BACKENDS[key]) def commands(self): cmd = ui.Subcommand('lyrics', help='fetch song lyrics') @@ -490,8 +476,10 @@ class LyricsPlugin(BeetsPlugin): # import_write config value. write = config['import']['write'].get(bool) for item in lib.items(ui.decargs(args)): - self.fetch_item_lyrics(lib, logging.INFO, item, write, - opts.force_refetch) + self.fetch_item_lyrics( + lib, logging.INFO, item, write, + opts.force_refetch or self.config['force'], + ) if opts.printlyr and item.lyrics: ui.print_(item.lyrics) @@ -504,7 +492,7 @@ class LyricsPlugin(BeetsPlugin): if self.config['auto']: for item in task.imported_items(): self.fetch_item_lyrics(session.lib, logging.DEBUG, item, - False, False) + False, self.config['force']) def fetch_item_lyrics(self, lib, loglevel, item, write, force): """Fetch and store lyrics for a single item. If ``write``, then the @@ -551,8 +539,6 @@ class LyricsPlugin(BeetsPlugin): for backend in self.backends: lyrics = backend(artist, title) if lyrics: - if isinstance(lyrics, str): - lyrics = lyrics.decode('utf8', 'ignore') log.debug(u'got lyrics from backend: {0}' .format(backend.__name__)) - return lyrics.strip() + return _scrape_strip_cruft(lyrics, True)