diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index e924e09e..10e5a469 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -936,11 +936,13 @@ def correctMetadata(albumid, release, downloaded_track_list): continue try: - cur_artist, cur_album, candidates, rec = autotag.tag_album(items, - search_artist=helpers.latinToAscii( - release['ArtistName']), - search_album=helpers.latinToAscii( - release['AlbumTitle'])) + cur_artist, cur_album, prop = autotag.tag_album(items, + search_artist=helpers.latinToAscii( + release['ArtistName']), + search_album=helpers.latinToAscii( + release['AlbumTitle'])) + candidates = prop.candidates + rec = prop.recommendation except Exception as e: logger.error('Error getting recommendation: %s. Not writing metadata', e) return False diff --git a/lib/beets/LICENSE b/lib/beets/LICENSE deleted file mode 100644 index cddcf990..00000000 --- a/lib/beets/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2010-2014 Adrian Sampson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/lib/beets/README.rst b/lib/beets/README.rst deleted file mode 100644 index 8c64e244..00000000 --- a/lib/beets/README.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. image:: https://travis-ci.org/sampsyo/beets.svg?branch=master - :target: https://travis-ci.org/sampsyo/beets - -.. image:: http://img.shields.io/coveralls/sampsyo/beets.svg - :target: https://coveralls.io/r/sampsyo/beets - -.. image:: http://img.shields.io/pypi/v/beets.svg - :target: https://pypi.python.org/pypi/beets - -Beets is the media library management system for obsessive-compulsive music -geeks. - -The purpose of beets is to get your music collection right once and for all. -It catalogs your collection, automatically improving its metadata as it goes. -It then provides a bouquet of tools for manipulating and accessing your music. - -Here's an example of beets' brainy tag corrector doing its thing:: - - $ beet import ~/music/ladytron - Tagging: - Ladytron - Witching Hour - (Similarity: 98.4%) - * Last One Standing -> The Last One Standing - * Beauty -> Beauty*2 - * White Light Generation -> Whitelightgenerator - * All the Way -> All the Way... - -Because beets is designed as a library, it can do almost anything you can -imagine for your music collection. Via `plugins`_, beets becomes a panacea: - -- Fetch or calculate all the metadata you could possibly need: `album art`_, - `lyrics`_, `genres`_, `tempos`_, `ReplayGain`_ levels, or `acoustic - fingerprints`_. -- Get metadata from `MusicBrainz`_, `Discogs`_, or `Beatport`_. Or guess - metadata using songs' filenames or their acoustic fingerprints. -- `Transcode audio`_ to any format you like. -- Check your library for `duplicate tracks and albums`_ or for `albums that - are missing tracks`_. -- Clean up crufty tags left behind by other, less-awesome tools. -- Embed and extract album art from files' metadata. -- Browse your music library graphically through a Web browser and play it in any - browser that supports `HTML5 Audio`_. -- Analyze music files' metadata from the command line. -- Listen to your library with a music player that speaks the `MPD`_ protocol - and works with a staggering variety of interfaces. - -If beets doesn't do what you want yet, `writing your own plugin`_ is -shockingly simple if you know a little Python. - -.. _plugins: http://beets.readthedocs.org/page/plugins/ -.. _MPD: http://www.musicpd.org/ -.. _MusicBrainz music collection: http://musicbrainz.org/doc/Collections/ -.. _writing your own plugin: - http://beets.readthedocs.org/page/dev/plugins.html -.. _HTML5 Audio: - http://www.w3.org/TR/html-markup/audio.html -.. _albums that are missing tracks: - http://beets.readthedocs.org/page/plugins/missing.html -.. _duplicate tracks and albums: - http://beets.readthedocs.org/page/plugins/duplicates.html -.. _Transcode audio: - http://beets.readthedocs.org/page/plugins/convert.html -.. _Beatport: http://www.beatport.com/ -.. _Discogs: http://www.discogs.com/ -.. _acoustic fingerprints: - http://beets.readthedocs.org/page/plugins/chroma.html -.. _ReplayGain: http://beets.readthedocs.org/page/plugins/replaygain.html -.. _tempos: http://beets.readthedocs.org/page/plugins/echonest.html -.. _genres: http://beets.readthedocs.org/page/plugins/lastgenre.html -.. _album art: http://beets.readthedocs.org/page/plugins/fetchart.html -.. _lyrics: http://beets.readthedocs.org/page/plugins/lyrics.html -.. _MusicBrainz: http://musicbrainz.org/ - -Read More ---------- - -Learn more about beets at `its Web site`_. Follow `@b33ts`_ on Twitter for -news and updates. - -You can install beets by typing ``pip install beets``. Then check out the -`Getting Started`_ guide. - -.. _its Web site: http://beets.radbox.org/ -.. _Getting Started: http://beets.readthedocs.org/page/guides/main.html -.. _@b33ts: http://twitter.com/b33ts/ - -Authors -------- - -Beets is by `Adrian Sampson`_ with a supporting cast of thousands. For help, -please contact the `mailing list`_. - -.. _mailing list: https://groups.google.com/forum/#!forum/beets-users -.. _Adrian Sampson: http://homes.cs.washington.edu/~asampson/ diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py old mode 100644 new mode 100755 index d050a028..9291673b --- a/lib/beets/__init__.py +++ b/lib/beets/__init__.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -12,17 +13,33 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -# This particular version has been slightly modified to work with Headphones -# https://github.com/rembo10/headphones - -__version__ = '1.3.10-headphones' -__author__ = 'Adrian Sampson ' +from __future__ import division, absolute_import, print_function import os -import beets.library from beets.util import confit -Library = beets.library.Library +# This particular version has been slightly modified to work with Headphones +# https://github.com/rembo10/headphones +__version__ = u'1.4.4-headphones' +__author__ = u'Adrian Sampson ' -config = confit.LazyConfig(os.path.dirname(__file__), __name__) + +class IncludeLazyConfig(confit.LazyConfig): + """A version of Confit's LazyConfig that also merges in data from + YAML files specified in an `include` setting. + """ + def read(self, user=True, defaults=True): + super(IncludeLazyConfig, self).read(user, defaults) + + try: + for view in self['include']: + filename = view.as_filename() + if os.path.isfile(filename): + self.set_file(filename) + except confit.NotFoundError: + pass + +# headphones +#config = IncludeLazyConfig('beets', __name__) +config = IncludeLazyConfig(os.path.dirname(__file__), __name__) diff --git a/lib/beets/__main__.py b/lib/beets/__main__.py new file mode 100755 index 00000000..8010ca0d --- /dev/null +++ b/lib/beets/__main__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2017, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""The __main__ module lets you run the beets CLI interface by typing +`python -m beets`. +""" + +from __future__ import division, absolute_import, print_function + +import sys +from .ui import main + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/lib/beets/art.py b/lib/beets/art.py new file mode 100755 index 00000000..979a6f72 --- /dev/null +++ b/lib/beets/art.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""High-level utilities for manipulating image files associated with +music and items' embedded album art. +""" + +from __future__ import division, absolute_import, print_function + +import subprocess +import platform +from tempfile import NamedTemporaryFile +import os + +from beets.util import displayable_path, syspath, bytestring_path +from beets.util.artresizer import ArtResizer +from beets import mediafile + + +def mediafile_image(image_path, maxwidth=None): + """Return a `mediafile.Image` object for the path. + """ + + with open(syspath(image_path), 'rb') as f: + data = f.read() + return mediafile.Image(data, type=mediafile.ImageType.front) + + +def get_art(log, item): + # Extract the art. + try: + mf = mediafile.MediaFile(syspath(item.path)) + except mediafile.UnreadableFileError as exc: + log.warning(u'Could not extract art from {0}: {1}', + displayable_path(item.path), exc) + return + + return mf.art + + +def embed_item(log, item, imagepath, maxwidth=None, itempath=None, + compare_threshold=0, ifempty=False, as_album=False): + """Embed an image into the item's media file. + """ + # Conditions and filters. + if compare_threshold: + if not check_art_similarity(log, item, imagepath, compare_threshold): + log.info(u'Image not similar; skipping.') + return + if ifempty and get_art(log, item): + log.info(u'media file already contained art') + return + if maxwidth and not as_album: + imagepath = resize_image(log, imagepath, maxwidth) + + # Get the `Image` object from the file. + try: + log.debug(u'embedding {0}', displayable_path(imagepath)) + image = mediafile_image(imagepath, maxwidth) + except IOError as exc: + log.warning(u'could not read image file: {0}', exc) + return + + # Make sure the image kind is safe (some formats only support PNG + # and JPEG). + if image.mime_type not in ('image/jpeg', 'image/png'): + log.info('not embedding image of unsupported type: {}', + image.mime_type) + return + + item.try_write(path=itempath, tags={'images': [image]}) + + +def embed_album(log, album, maxwidth=None, quiet=False, + compare_threshold=0, ifempty=False): + """Embed album art into all of the album's items. + """ + imagepath = album.artpath + if not imagepath: + log.info(u'No album art present for {0}', album) + return + if not os.path.isfile(syspath(imagepath)): + log.info(u'Album art not found at {0} for {1}', + displayable_path(imagepath), album) + return + if maxwidth: + imagepath = resize_image(log, imagepath, maxwidth) + + log.info(u'Embedding album art into {0}', album) + + for item in album.items(): + embed_item(log, item, imagepath, maxwidth, None, + compare_threshold, ifempty, as_album=True) + + +def resize_image(log, imagepath, maxwidth): + """Returns path to an image resized to maxwidth. + """ + log.debug(u'Resizing album art to {0} pixels wide', maxwidth) + imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) + return imagepath + + +def check_art_similarity(log, item, imagepath, compare_threshold): + """A boolean indicating if an image is similar to embedded item art. + """ + with NamedTemporaryFile(delete=True) as f: + art = extract(log, f.name, item) + + if art: + is_windows = platform.system() == "Windows" + + # Converting images to grayscale tends to minimize the weight + # of colors in the diff score. So we first convert both images + # to grayscale and then pipe them into the `compare` command. + # On Windows, ImageMagick doesn't support the magic \\?\ prefix + # on paths, so we pass `prefix=False` to `syspath`. + convert_cmd = ['convert', syspath(imagepath, prefix=False), + syspath(art, prefix=False), + '-colorspace', 'gray', 'MIFF:-'] + compare_cmd = ['compare', '-metric', 'PHASH', '-', 'null:'] + log.debug(u'comparing images with pipeline {} | {}', + convert_cmd, compare_cmd) + convert_proc = subprocess.Popen( + convert_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=not is_windows, + ) + compare_proc = subprocess.Popen( + compare_cmd, + stdin=convert_proc.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=not is_windows, + ) + + # Check the convert output. We're not interested in the + # standard output; that gets piped to the next stage. + convert_proc.stdout.close() + convert_stderr = convert_proc.stderr.read() + convert_proc.stderr.close() + convert_proc.wait() + if convert_proc.returncode: + log.debug( + u'ImageMagick convert failed with status {}: {!r}', + convert_proc.returncode, + convert_stderr, + ) + return + + # Check the compare output. + stdout, stderr = compare_proc.communicate() + if compare_proc.returncode: + if compare_proc.returncode != 1: + log.debug(u'ImageMagick compare failed: {0}, {1}', + displayable_path(imagepath), + displayable_path(art)) + return + out_str = stderr + else: + out_str = stdout + + try: + phash_diff = float(out_str) + except ValueError: + log.debug(u'IM output is not a number: {0!r}', out_str) + return + + log.debug(u'ImageMagick compare score: {0}', phash_diff) + return phash_diff <= compare_threshold + + return True + + +def extract(log, outpath, item): + art = get_art(log, item) + outpath = bytestring_path(outpath) + if not art: + log.info(u'No album art present in {0}, skipping.', item) + return + + # Add an extension to the filename. + ext = mediafile.image_extension(art) + if not ext: + log.warning(u'Unknown image type in {0}.', + displayable_path(item.path)) + return + outpath += bytestring_path('.' + ext) + + log.info(u'Extracting album art from: {0} to: {1}', + item, displayable_path(outpath)) + with open(syspath(outpath), 'wb') as f: + f.write(art) + return outpath + + +def extract_first(log, outpath, items): + for item in items: + real_path = extract(log, outpath, item) + if real_path: + return real_path + + +def clear(log, lib, query): + items = lib.items(query) + log.info(u'Clearing album art from {0} items', len(items)) + for item in items: + log.debug(u'Clearing art for {0}', item) + item.try_write(tags={'images': None}) diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py old mode 100644 new mode 100755 index 7c517c60..54ce8e0c --- a/lib/beets/autotag/__init__.py +++ b/lib/beets/autotag/__init__.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,13 +15,15 @@ """Facilities for automatically determining files' correct metadata. """ -import logging +from __future__ import division, absolute_import, print_function + +from beets import logging from beets import config # Parts of external interface. from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa -from .match import tag_item, tag_album # noqa +from .match import tag_item, tag_album, Proposal # noqa from .match import Recommendation # noqa # Global logger. @@ -39,6 +42,16 @@ def apply_item_metadata(item, track_info): item.mb_trackid = track_info.track_id if track_info.artist_id: item.mb_artistid = track_info.artist_id + if track_info.data_source: + item.data_source = track_info.data_source + + if track_info.lyricist is not None: + item.lyricist = track_info.lyricist + if track_info.composer is not None: + item.composer = track_info.composer + if track_info.arranger is not None: + item.arranger = track_info.arranger + # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? @@ -47,7 +60,7 @@ def apply_metadata(album_info, mapping): """Set the items' metadata to match an AlbumInfo object using a mapping from Items to TrackInfo objects. """ - for item, track_info in mapping.iteritems(): + for item, track_info in mapping.items(): # Album, artist, track count. if track_info.artist: item.artist = track_info.artist @@ -90,7 +103,12 @@ def apply_metadata(album_info, mapping): item.title = track_info.title if config['per_disc_numbering']: - item.track = track_info.medium_index or track_info.index + # We want to let the track number be zero, but if the medium index + # is not provided we need to fall back to the overall index. + if track_info.medium_index is not None: + item.track = track_info.medium_index + else: + item.track = track_info.index item.tracktotal = track_info.medium_total or len(album_info.tracks) else: item.track = track_info.index @@ -122,7 +140,8 @@ def apply_metadata(album_info, mapping): 'language', 'country', 'albumstatus', - 'albumdisambig'): + 'albumdisambig', + 'data_source',): value = getattr(album_info, field) if value is not None: item[field] = value @@ -132,5 +151,14 @@ def apply_metadata(album_info, mapping): if track_info.media is not None: item.media = track_info.media + if track_info.lyricist is not None: + item.lyricist = track_info.lyricist + if track_info.composer is not None: + item.composer = track_info.composer + if track_info.arranger is not None: + item.arranger = track_info.arranger + + item.track_alt = track_info.track_alt + # Headphones seal of approval - item.comments = 'tagged by headphones/beets' \ No newline at end of file + item.comments = 'tagged by headphones/beets' diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py old mode 100644 new mode 100755 index beb3bd91..3c403fcf --- a/lib/beets/autotag/hooks.py +++ b/lib/beets/autotag/hooks.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,15 +14,20 @@ # included in all copies or substantial portions of the Software. """Glue between metadata sources and the matching logic.""" -import logging +from __future__ import division, absolute_import, print_function + from collections import namedtuple +from functools import total_ordering import re +from beets import logging from beets import plugins from beets import config +from beets.util import as_string from beets.autotag import mb -from beets.util import levenshtein +from jellyfish import levenshtein_distance from unidecode import unidecode +import six log = logging.getLogger('beets') @@ -101,7 +107,7 @@ class AlbumInfo(object): # Work around a bug in python-musicbrainz-ngs that causes some # strings to be bytes rather than Unicode. # https://github.com/alastair/python-musicbrainz-ngs/issues/85 - def decode(self, codec='utf8'): + def decode(self, codec='utf-8'): """Ensure that all string attributes on this object, and the constituent `TrackInfo` objects, are decoded to Unicode. """ @@ -109,7 +115,7 @@ class AlbumInfo(object): 'catalognum', 'script', 'language', 'country', 'albumstatus', 'albumdisambig', 'artist_credit', 'media']: value = getattr(self, fld) - if isinstance(value, str): + if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) if self.tracks: @@ -134,6 +140,12 @@ class TrackInfo(object): - ``artist_sort``: name of the track artist for sorting - ``disctitle``: name of the individual medium (subtitle) - ``artist_credit``: Recording-specific artist name + - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) + - ``data_url``: The data source release URL. + - ``lyricist``: individual track lyricist name + - ``composer``: individual track composer name + - ``arranger`: individual track arranger name + - ``track_alt``: alternative track number (tape, vinyl, etc.) Only ``title`` and ``track_id`` are required. The rest of the fields may be None. The indices ``index``, ``medium``, and ``medium_index`` @@ -143,7 +155,8 @@ class TrackInfo(object): length=None, index=None, medium=None, medium_index=None, medium_total=None, artist_sort=None, disctitle=None, artist_credit=None, data_source=None, data_url=None, - media=None): + media=None, lyricist=None, composer=None, arranger=None, + track_alt=None): self.title = title self.track_id = track_id self.artist = artist @@ -159,16 +172,20 @@ class TrackInfo(object): self.artist_credit = artist_credit self.data_source = data_source self.data_url = data_url + self.lyricist = lyricist + self.composer = composer + self.arranger = arranger + self.track_alt = track_alt # As above, work around a bug in python-musicbrainz-ngs. - def decode(self, codec='utf8'): + def decode(self, codec='utf-8'): """Ensure that all string attributes on this object are decoded to Unicode. """ for fld in ['title', 'artist', 'medium', 'artist_sort', 'disctitle', 'artist_credit', 'media']: value = getattr(self, fld) - if isinstance(value, str): + if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) @@ -198,13 +215,15 @@ def _string_dist_basic(str1, str2): transliteration/lowering to ASCII characters. Normalized by string length. """ - str1 = unidecode(str1) - str2 = unidecode(str2) + assert isinstance(str1, six.text_type) + assert isinstance(str2, six.text_type) + str1 = as_string(unidecode(str1)) + str2 = as_string(unidecode(str2)) str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) if not str1 and not str2: return 0.0 - return levenshtein(str1, str2) / float(max(len(str1), len(str2))) + return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2))) def string_dist(str1, str2): @@ -281,6 +300,8 @@ class LazyClassProperty(object): return self.value +@total_ordering +@six.python_2_unicode_compatible class Distance(object): """Keeps track of multiple distance penalties. Provides a single weighted distance for all penalties as well as a weighted distance @@ -290,7 +311,7 @@ class Distance(object): self._penalties = {} @LazyClassProperty - def _weights(cls): + def _weights(cls): # noqa """A dictionary from keys to floating-point weights. """ weights_view = config['match']['distance_weights'] @@ -316,7 +337,7 @@ class Distance(object): """Return the maximum distance penalty (normalization factor). """ dist_max = 0.0 - for key, penalty in self._penalties.iteritems(): + for key, penalty in self._penalties.items(): dist_max += len(penalty) * self._weights[key] return dist_max @@ -325,7 +346,7 @@ class Distance(object): """Return the raw (denormalized) distance. """ dist_raw = 0.0 - for key, penalty in self._penalties.iteritems(): + for key, penalty in self._penalties.items(): dist_raw += sum(penalty) * self._weights[key] return dist_raw @@ -342,12 +363,21 @@ class Distance(object): # Convert distance into a negative float we can sort items in # ascending order (for keys, when the penalty is equal) and # still get the items with the biggest distance first. - return sorted(list_, key=lambda (key, dist): (0 - dist, key)) + return sorted( + list_, + key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) + ) + + def __hash__(self): + return id(self) + + def __eq__(self, other): + return self.distance == other # Behave like a float. - def __cmp__(self, other): - return cmp(self.distance, other) + def __lt__(self, other): + return self.distance < other def __float__(self): return self.distance @@ -358,6 +388,9 @@ class Distance(object): def __rsub__(self, other): return other - self.distance + def __str__(self): + return "{0:.2f}".format(self.distance) + # Behave like a dict. def __getitem__(self, key): @@ -383,9 +416,9 @@ class Distance(object): """ if not isinstance(dist, Distance): raise ValueError( - '`dist` must be a Distance object, not {0}'.format(type(dist)) + u'`dist` must be a Distance object, not {0}'.format(type(dist)) ) - for key, penalties in dist._penalties.iteritems(): + for key, penalties in dist._penalties.items(): self._penalties.setdefault(key, []).extend(penalties) # Adding components. @@ -407,7 +440,7 @@ class Distance(object): """ if not 0.0 <= dist <= 1.0: raise ValueError( - '`dist` must be between 0.0 and 1.0, not {0}'.format(dist) + u'`dist` must be between 0.0 and 1.0, not {0}'.format(dist) ) self._penalties.setdefault(key, []).append(dist) @@ -516,20 +549,29 @@ def track_for_mbid(recording_id): exc.log(log) +@plugins.notify_info_yielded(u'albuminfo_received') def albums_for_id(album_id): """Get a list of albums for an ID.""" - candidates = [album_for_mbid(album_id)] - candidates.extend(plugins.album_for_id(album_id)) - return filter(None, candidates) + a = album_for_mbid(album_id) + if a: + yield a + for a in plugins.album_for_id(album_id): + if a: + yield a +@plugins.notify_info_yielded(u'trackinfo_received') def tracks_for_id(track_id): """Get a list of tracks for an ID.""" - candidates = [track_for_mbid(track_id)] - candidates.extend(plugins.track_for_id(track_id)) - return filter(None, candidates) + t = track_for_mbid(track_id) + if t: + yield t + for t in plugins.track_for_id(track_id): + if t: + yield t +@plugins.notify_info_yielded(u'albuminfo_received') def album_candidates(items, artist, album, va_likely): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective @@ -537,43 +579,42 @@ def album_candidates(items, artist, album, va_likely): entered by the user. ``va_likely`` is a boolean indicating whether the album is likely to be a "various artists" release. """ - out = [] - # Base candidates if we have album and artist to match. if artist and album: try: - out.extend(mb.match_album(artist, album, len(items))) + for candidate in mb.match_album(artist, album, len(items)): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Also add VA matches from MusicBrainz where appropriate. if va_likely and album: try: - out.extend(mb.match_album(None, album, len(items))) + for candidate in mb.match_album(None, album, len(items)): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Candidates from plugins. - out.extend(plugins.candidates(items, artist, album, va_likely)) - - return out + for candidate in plugins.candidates(items, artist, album, va_likely): + yield candidate +@plugins.notify_info_yielded(u'trackinfo_received') def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or are specified by the user. """ - out = [] # MusicBrainz candidates. if artist and title: try: - out.extend(mb.match_track(artist, title)) + for candidate in mb.match_track(artist, title): + yield candidate except mb.MusicBrainzAPIError as exc: exc.log(log) # Plugin candidates. - out.extend(plugins.item_candidates(item, artist, title)) - - return out + for candidate in plugins.item_candidates(item, artist, title): + yield candidate diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py old mode 100644 new mode 100755 index 2d1f2007..71b62adb --- a/lib/beets/autotag/match.py +++ b/lib/beets/autotag/match.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -15,13 +16,15 @@ """Matches existing metadata with canonical information to identify releases and tracks. """ -from __future__ import division + +from __future__ import division, absolute_import, print_function import datetime -import logging import re from munkres import Munkres +from collections import namedtuple +from beets import logging from beets import plugins from beets import config from beets.util import plurality @@ -50,6 +53,13 @@ class Recommendation(OrderedEnum): strong = 3 +# A structure for holding a set of possible matches to choose between. This +# consists of a list of possible candidates (i.e., AlbumInfo or TrackInfo +# objects) and a recommendation value. + +Proposal = namedtuple('Proposal', ('candidates', 'recommendation')) + + # Primary matching functionality. def current_metadata(items): @@ -93,7 +103,9 @@ def assign_items(items, tracks): costs.append(row) # Find a minimum-cost bipartite matching. + log.debug('Computing track assignment...') matching = Munkres().compute(costs) + log.debug('...done.') # Produce the output matching. mapping = dict((items[i], tracks[j]) for (i, j) in matching) @@ -235,7 +247,7 @@ def distance(items, album_info, mapping): # Tracks. dist.tracks = {} - for item, track in mapping.iteritems(): + for item, track in mapping.items(): dist.tracks[track] = track_distance(item, track, album_info.va) dist.add('tracks', dist.tracks[track].distance) @@ -258,19 +270,23 @@ def match_by_id(items): AlbumInfo object for the corresponding album. Otherwise, returns None. """ - # Is there a consensus on the MB album ID? - albumids = [item.mb_albumid for item in items if item.mb_albumid] - if not albumids: - log.debug(u'No album IDs found.') + albumids = (item.mb_albumid for item in items if item.mb_albumid) + + # Did any of the items have an MB album ID? + try: + first = next(albumids) + except StopIteration: + log.debug(u'No album ID found.') return None + # Is there a consensus on the MB album ID? + for other in albumids: + if other != first: + log.debug(u'No album ID consensus.') + return None # If all album IDs are equal, look up the album. - if bool(reduce(lambda x, y: x if x == y else (), albumids)): - albumid = albumids[0] - log.debug(u'Searching for discovered album ID: {0}'.format(albumid)) - return hooks.album_for_mbid(albumid) - else: - log.debug(u'No album ID consensus.') + log.debug(u'Searching for discovered album ID: {0}', first) + return hooks.album_for_mbid(first) def _recommendation(results): @@ -309,10 +325,10 @@ def _recommendation(results): keys = set(min_dist.keys()) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(track_dist.keys()) + keys.update(list(track_dist.keys())) max_rec_view = config['match']['max_rec'] for key in keys: - if key in max_rec_view.keys(): + if key in list(max_rec_view.keys()): max_rec = max_rec_view[key].as_choice({ 'strong': Recommendation.strong, 'medium': Recommendation.medium, @@ -324,17 +340,23 @@ def _recommendation(results): return rec +def _sort_candidates(candidates): + """Sort candidates by distance.""" + return sorted(candidates, key=lambda match: match.distance) + + def _add_candidate(items, results, info): """Given a candidate AlbumInfo object, attempt to add the candidate to the output dictionary of AlbumMatch objects. This involves checking the track count, ordering the items, checking for duplicates, and calculating the distance. """ - log.debug(u'Candidate: {0} - {1}'.format(info.artist, info.album)) + log.debug(u'Candidate: {0} - {1} ({2})', + info.artist, info.album, info.album_id) # Discard albums with zero tracks. if not info.tracks: - log.debug('No tracks.') + log.debug(u'No tracks.') return # Don't duplicate. @@ -345,7 +367,7 @@ def _add_candidate(items, results, info): # Discard matches without required tags. for req_tag in config['match']['required'].as_str_seq(): if getattr(info, req_tag) is None: - log.debug(u'Ignored. Missing required tag: {0}'.format(req_tag)) + log.debug(u'Ignored. Missing required tag: {0}', req_tag) return # Find mapping between the items and the track info. @@ -355,48 +377,52 @@ def _add_candidate(items, results, info): dist = distance(items, info, mapping) # Skip matches with ignored penalties. - penalties = [key for _, key in dist] + penalties = [key for key, _ in dist] for penalty in config['match']['ignored'].as_str_seq(): if penalty in penalties: - log.debug(u'Ignored. Penalty: {0}'.format(penalty)) + log.debug(u'Ignored. Penalty: {0}', penalty) return - log.debug(u'Success. Distance: {0}'.format(dist)) + log.debug(u'Success. Distance: {0}', dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) def tag_album(items, search_artist=None, search_album=None, - search_id=None): - """Return a tuple of a artist name, an album name, a list of - `AlbumMatch` candidates from the metadata backend, and a - `Recommendation`. + search_ids=[]): + """Return a tuple of the current artist name, the current album + name, and a `Proposal` containing `AlbumMatch` candidates. The artist and album are the most common values of these fields among `items`. The `AlbumMatch` objects are generated by searching the metadata backends. By default, the metadata of the items is used for the - search. This can be customized by setting the parameters. The - `mapping` field of the album has the matched `items` as keys. + search. This can be customized by setting the parameters. + `search_ids` is a list of metadata backend IDs: if specified, + it will restrict the candidates to those IDs, ignoring + `search_artist` and `search album`. The `mapping` field of the + album has the matched `items` as keys. - The recommendation is calculated from the match qualitiy of the + The recommendation is calculated from the match quality of the candidates. """ # Get current metadata. likelies, consensus = current_metadata(items) cur_artist = likelies['artist'] cur_album = likelies['album'] - log.debug(u'Tagging {0} - {1}'.format(cur_artist, cur_album)) + log.debug(u'Tagging {0} - {1}', cur_artist, cur_album) # The output result (distance, AlbumInfo) tuples (keyed by MB album # ID). candidates = {} # Search by explicit ID. - if search_id is not None: - log.debug(u'Searching for album ID: {0}'.format(search_id)) - search_cands = hooks.albums_for_id(search_id) + if search_ids: + for search_id in search_ids: + log.debug(u'Searching for album ID: {0}', search_id) + for id_candidate in hooks.albums_for_id(search_id): + _add_candidate(items, candidates, id_candidate) # Use existing metadata or text search. else: @@ -404,81 +430,84 @@ def tag_album(items, search_artist=None, search_album=None, id_info = match_by_id(items) if id_info: _add_candidate(items, candidates, id_info) - rec = _recommendation(candidates.values()) - log.debug(u'Album ID match recommendation is {0}'.format(str(rec))) + rec = _recommendation(list(candidates.values())) + log.debug(u'Album ID match recommendation is {0}', rec) if candidates and not config['import']['timid']: # If we have a very good MBID match, return immediately. # Otherwise, this match will compete against metadata-based # matches. if rec == Recommendation.strong: log.debug(u'ID match.') - return cur_artist, cur_album, candidates.values(), rec + return cur_artist, cur_album, \ + Proposal(list(candidates.values()), rec) # Search terms. if not (search_artist and search_album): # No explicit search terms -- use current metadata. search_artist, search_album = cur_artist, cur_album - log.debug(u'Search terms: {0} - {1}'.format(search_artist, - search_album)) + log.debug(u'Search terms: {0} - {1}', search_artist, search_album) # Is this album likely to be a "various artist" release? va_likely = ((not consensus['artist']) or (search_artist.lower() in VA_ARTISTS) or any(item.comp for item in items)) - log.debug(u'Album might be VA: {0}'.format(str(va_likely))) + log.debug(u'Album might be VA: {0}', va_likely) # Get the results from the data sources. - search_cands = hooks.album_candidates(items, search_artist, - search_album, va_likely) - - log.debug(u'Evaluating {0} candidates.'.format(len(search_cands))) - for info in search_cands: - _add_candidate(items, candidates, info) + for matched_candidate in hooks.album_candidates(items, + search_artist, + search_album, + va_likely): + _add_candidate(items, candidates, matched_candidate) + log.debug(u'Evaluating {0} candidates.', len(candidates)) # Sort and get the recommendation. - candidates = sorted(candidates.itervalues()) + candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) - return cur_artist, cur_album, candidates, rec + return cur_artist, cur_album, Proposal(candidates, rec) def tag_item(item, search_artist=None, search_title=None, - search_id=None): - """Attempts to find metadata for a single track. Returns a - `(candidates, recommendation)` pair where `candidates` is a list of - TrackMatch objects. `search_artist` and `search_title` may be used + search_ids=[]): + """Find metadata for a single track. Return a `Proposal` consisting + of `TrackMatch` objects. + + `search_artist` and `search_title` may be used to override the current metadata for the purposes of the MusicBrainz - title; likewise `search_id`. + title. `search_ids` may be used for restricting the search to a list + of metadata backend IDs. """ # Holds candidates found so far: keys are MBIDs; values are # (distance, TrackInfo) pairs. candidates = {} # First, try matching by MusicBrainz ID. - trackid = search_id or item.mb_trackid - if trackid: - log.debug(u'Searching for track ID: {0}'.format(trackid)) - for track_info in hooks.tracks_for_id(trackid): - dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.track_id] = \ - hooks.TrackMatch(dist, track_info) - # If this is a good match, then don't keep searching. - rec = _recommendation(candidates.values()) - if rec == Recommendation.strong and not config['import']['timid']: - log.debug(u'Track ID match.') - return candidates.values(), rec + trackids = search_ids or [t for t in [item.mb_trackid] if t] + if trackids: + for trackid in trackids: + log.debug(u'Searching for track ID: {0}', trackid) + for track_info in hooks.tracks_for_id(trackid): + dist = track_distance(item, track_info, incl_artist=True) + candidates[track_info.track_id] = \ + hooks.TrackMatch(dist, track_info) + # If this is a good match, then don't keep searching. + rec = _recommendation(_sort_candidates(candidates.values())) + if rec == Recommendation.strong and \ + not config['import']['timid']: + log.debug(u'Track ID match.') + return Proposal(_sort_candidates(candidates.values()), rec) # If we're searching by ID, don't proceed. - if search_id is not None: + if search_ids: if candidates: - return candidates.values(), rec + return Proposal(_sort_candidates(candidates.values()), rec) else: - return [], Recommendation.none + return Proposal([], Recommendation.none) # Search terms. if not (search_artist and search_title): search_artist, search_title = item.artist, item.title - log.debug(u'Item search terms: {0} - {1}'.format(search_artist, - search_title)) + log.debug(u'Item search terms: {0} - {1}', search_artist, search_title) # Get and evaluate candidate metadata. for track_info in hooks.item_candidates(item, search_artist, search_title): @@ -486,7 +515,7 @@ def tag_item(item, search_artist=None, search_title=None, candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) # Sort by distance and return with recommendation. - log.debug(u'Found {0} candidates.'.format(len(candidates))) - candidates = sorted(candidates.itervalues()) + log.debug(u'Found {0} candidates.', len(candidates)) + candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) - return candidates, rec + return Proposal(candidates, rec) diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py old mode 100644 new mode 100755 index d063f627..a6133adb --- a/lib/beets/autotag/mb.py +++ b/lib/beets/autotag/mb.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,23 +15,29 @@ """Searches for albums in the MusicBrainz database. """ -import logging +from __future__ import division, absolute_import, print_function + import musicbrainzngs import re import traceback -from urlparse import urljoin +from six.moves.urllib.parse import urljoin +from beets import logging import beets.autotag.hooks import beets from beets import util from beets import config +import six -SEARCH_LIMIT = 5 VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -BASE_URL = 'http://musicbrainz.org/' + +if util.SNI_SUPPORTED: + BASE_URL = 'https://musicbrainz.org/' +else: + BASE_URL = 'http://musicbrainz.org/' musicbrainzngs.set_useragent('beets', beets.__version__, - 'http://beets.radbox.org/') + 'http://beets.io/') class MusicBrainzAPIError(util.HumanReadableException): @@ -39,6 +46,8 @@ class MusicBrainzAPIError(util.HumanReadableException): """ def __init__(self, reason, verb, query, tb=None): self.query = query + if isinstance(reason, musicbrainzngs.WebServiceError): + reason = u'MusicBrainz not reachable' super(MusicBrainzAPIError, self).__init__(reason, verb, tb) def get_message(self): @@ -49,8 +58,12 @@ class MusicBrainzAPIError(util.HumanReadableException): log = logging.getLogger('beets') RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', - 'labels', 'artist-credits', 'aliases'] + 'labels', 'artist-credits', 'aliases', + 'recording-level-rels', 'work-rels', + 'work-level-rels', 'artist-rels'] TRACK_INCLUDES = ['artists', 'aliases'] +if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: + TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] def track_url(trackid): @@ -65,7 +78,8 @@ def configure(): """Set up the python-musicbrainz-ngs module according to settings from the beets configuration. This should be called at startup. """ - musicbrainzngs.set_hostname(config['musicbrainz']['host'].get(unicode)) + hostname = config['musicbrainz']['host'].as_str() + musicbrainzngs.set_hostname(hostname) musicbrainzngs.set_rate_limit( config['musicbrainz']['ratelimit_interval'].as_number(), config['musicbrainz']['ratelimit'].get(int), @@ -104,7 +118,7 @@ def _flatten_artist_credit(credit): artist_sort_parts = [] artist_credit_parts = [] for el in credit: - if isinstance(el, basestring): + if isinstance(el, six.string_types): # Join phrase. artist_parts.append(el) artist_credit_parts.append(el) @@ -157,6 +171,7 @@ def track_info(recording, index=None, medium=None, medium_index=None, medium=medium, medium_index=medium_index, medium_total=medium_total, + data_source=u'MusicBrainz', data_url=track_url(recording['id']), ) @@ -172,6 +187,33 @@ def track_info(recording, index=None, medium=None, medium_index=None, if recording.get('length'): info.length = int(recording['length']) / (1000.0) + lyricist = [] + composer = [] + for work_relation in recording.get('work-relation-list', ()): + if work_relation['type'] != 'performance': + continue + for artist_relation in work_relation['work'].get( + 'artist-relation-list', ()): + if 'type' in artist_relation: + type = artist_relation['type'] + if type == 'lyricist': + lyricist.append(artist_relation['artist']['name']) + elif type == 'composer': + composer.append(artist_relation['artist']['name']) + if lyricist: + info.lyricist = u', '.join(lyricist) + if composer: + info.composer = u', '.join(composer) + + arranger = [] + for artist_relation in recording.get('artist-relation-list', ()): + if 'type' in artist_relation: + type = artist_relation['type'] + if type == 'arranger': + arranger.append(artist_relation['artist']['name']) + if arranger: + info.arranger = u', '.join(arranger) + info.decode() return info @@ -210,7 +252,12 @@ def album_info(release): for medium in release['medium-list']: disctitle = medium.get('title') format = medium.get('format') - for track in medium['track-list']: + + all_tracks = medium['track-list'] + if 'pregap' in medium: + all_tracks.insert(0, medium['pregap']) + + for track in all_tracks: # Basic information from the recording. index += 1 ti = track_info( @@ -222,6 +269,7 @@ def album_info(release): ) ti.disctitle = disctitle ti.media = format + ti.track_alt = track['number'] # Prefer track data, where present, over recording data. if track.get('title'): @@ -245,10 +293,12 @@ def album_info(release): mediums=len(release['medium-list']), artist_sort=artist_sort_name, artist_credit=artist_credit_name, - data_source='MusicBrainz', + data_source=u'MusicBrainz', data_url=album_url(release['id']), ) info.va = info.artist_id == VARIOUS_ARTISTS_ID + if info.va: + info.artist = config['va_name'].as_str() info.asin = release.get('asin') info.releasegroup_id = release['release-group']['id'] info.country = release.get('country') @@ -301,7 +351,7 @@ def album_info(release): return info -def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): +def match_album(artist, album, tracks=None): """Searches for a single album ("release" in MusicBrainz parlance) and returns an iterator over AlbumInfo objects. May raise a MusicBrainzAPIError. @@ -317,14 +367,16 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): # Various Artists search. criteria['arid'] = VARIOUS_ARTISTS_ID if tracks is not None: - criteria['tracks'] = str(tracks) + criteria['tracks'] = six.text_type(tracks) # Abort if we have no search terms. - if not any(criteria.itervalues()): + if not any(criteria.values()): return try: - res = musicbrainzngs.search_releases(limit=limit, **criteria) + log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria) + res = musicbrainzngs.search_releases( + limit=config['musicbrainz']['searchlimit'].get(int), **criteria) except musicbrainzngs.MusicBrainzError as exc: raise MusicBrainzAPIError(exc, 'release search', criteria, traceback.format_exc()) @@ -336,7 +388,7 @@ def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): yield albuminfo -def match_track(artist, title, limit=SEARCH_LIMIT): +def match_track(artist, title): """Searches for a single track and returns an iterable of TrackInfo objects. May raise a MusicBrainzAPIError. """ @@ -345,11 +397,12 @@ def match_track(artist, title, limit=SEARCH_LIMIT): 'recording': title.lower().strip(), } - if not any(criteria.itervalues()): + if not any(criteria.values()): return try: - res = musicbrainzngs.search_recordings(limit=limit, **criteria) + res = musicbrainzngs.search_recordings( + limit=config['musicbrainz']['searchlimit'].get(int), **criteria) except musicbrainzngs.MusicBrainzError as exc: raise MusicBrainzAPIError(exc, 'recording search', criteria, traceback.format_exc()) @@ -362,7 +415,7 @@ def _parse_id(s): no ID can be found, return None. """ # Find the first thing that looks like a UUID/MBID. - match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) + match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) if match: return match.group() @@ -372,9 +425,10 @@ def album_for_id(releaseid): object or None if the album is not found. May raise a MusicBrainzAPIError. """ + log.debug(u'Requesting MusicBrainz release {}', releaseid) albumid = _parse_id(releaseid) if not albumid: - log.debug(u'Invalid MBID ({0}).'.format(releaseid)) + log.debug(u'Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_release_by_id(albumid, @@ -383,7 +437,7 @@ def album_for_id(releaseid): log.debug(u'Album ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'get release by ID', albumid, + raise MusicBrainzAPIError(exc, u'get release by ID', albumid, traceback.format_exc()) return album_info(res['release']) @@ -394,7 +448,7 @@ def track_for_id(releaseid): """ trackid = _parse_id(releaseid) if not trackid: - log.debug(u'Invalid MBID ({0}).'.format(releaseid)) + log.debug(u'Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) @@ -402,6 +456,6 @@ def track_for_id(releaseid): log.debug(u'Track ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, + raise MusicBrainzAPIError(exc, u'get recording by ID', trackid, traceback.format_exc()) return track_info(res['recording']) diff --git a/lib/beets/config_default.yaml b/lib/beets/config_default.yaml old mode 100644 new mode 100755 index 78f16d05..3b037796 --- a/lib/beets/config_default.yaml +++ b/lib/beets/config_default.yaml @@ -6,6 +6,7 @@ import: copy: yes move: no link: no + hardlink: no delete: no resume: ask incremental: no @@ -22,9 +23,13 @@ import: flat: no group_albums: no pretend: false + search_ids: [] + duplicate_action: ask clutter: ["Thumbs.DB", ".DS_Store"] -ignore: [".*", "*~", "System Volume Information"] +ignore: [".*", "*~", "System Volume Information", "lost+found"] +ignore_hidden: yes + replace: '[\\/]': _ '^\.': _ @@ -41,24 +46,35 @@ max_filename_length: 0 plugins: [] pluginpath: [] threaded: yes -color: yes timeout: 5.0 per_disc_numbering: no -verbose: no -terminal_encoding: utf8 +verbose: 0 +terminal_encoding: original_date: no id3v23: no +va_name: "Various Artists" ui: terminal_width: 80 length_diff_thresh: 10.0 + color: yes + colors: + text_success: green + text_warning: yellow + text_error: red + text_highlight: red + text_highlight_minor: lightgray + action_default: turquoise + action: blue -list_format_item: $artist - $album - $title -list_format_album: $albumartist - $album +format_item: $artist - $album - $title +format_album: $albumartist - $album time_format: '%Y-%m-%d %H:%M:%S' +format_raw_length: no sort_album: albumartist+ album+ sort_item: artist+ album+ disc+ track+ +sort_case_insensitive: yes paths: default: $albumartist/$album%aunique{}/$track $title @@ -71,6 +87,7 @@ musicbrainz: host: musicbrainz.org ratelimit: 1 ratelimit_interval: 1.0 + searchlimit: 5 match: strong_rec_thresh: 0.04 diff --git a/lib/beets/dbcore/__init__.py b/lib/beets/dbcore/__init__.py old mode 100644 new mode 100755 index c364fdfc..689e7202 --- a/lib/beets/dbcore/__init__.py +++ b/lib/beets/dbcore/__init__.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -15,11 +16,14 @@ """DBCore is an abstract database package that forms the basis for beets' Library. """ +from __future__ import division, absolute_import, print_function + from .db import Model, Database from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery from .types import Type from .queryparse import query_from_strings from .queryparse import sort_from_strings from .queryparse import parse_sorted_query +from .query import InvalidQueryError # flake8: noqa diff --git a/lib/beets/dbcore/db.py b/lib/beets/dbcore/db.py old mode 100644 new mode 100755 index 0c786daa..6b0ed8b4 --- a/lib/beets/dbcore/db.py +++ b/lib/beets/dbcore/db.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,6 +15,8 @@ """The central Model and Database constructs for DBCore. """ +from __future__ import division, absolute_import, print_function + import time import os from collections import defaultdict @@ -24,14 +27,16 @@ import collections import beets from beets.util.functemplate import Template +from beets.util import py3_path from beets.dbcore import types from .query import MatchQuery, NullSort, TrueQuery +import six class FormattedMapping(collections.Mapping): """A `dict`-like formatted view of a model. - The accessor `mapping[key]` returns the formated version of + The accessor `mapping[key]` returns the formatted version of `model[key]` as a unicode string. If `for_path` is true, all path separators in the formatted values @@ -63,10 +68,10 @@ class FormattedMapping(collections.Mapping): def _get_formatted(self, model, key): value = model._type(key).format(model.get(key)) if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') if self.for_path: - sep_repl = beets.config['path_sep_replace'].get(unicode) + sep_repl = beets.config['path_sep_replace'].as_str() for sep in (os.path.sep, os.path.altsep): if sep: value = value.replace(sep, sep_repl) @@ -173,9 +178,9 @@ class Model(object): ordinary construction are bypassed. """ obj = cls(db) - for key, value in fixed_values.iteritems(): + for key, value in fixed_values.items(): obj._values_fixed[key] = cls._type(key).from_sql(value) - for key, value in flex_values.iteritems(): + for key, value in flex_values.items(): obj._values_flex[key] = cls._type(key).from_sql(value) return obj @@ -197,20 +202,22 @@ class Model(object): exception is raised otherwise. """ if not self._db: - raise ValueError('{0} has no database'.format(type(self).__name__)) + raise ValueError( + u'{0} has no database'.format(type(self).__name__) + ) if need_id and not self.id: - raise ValueError('{0} has no id'.format(type(self).__name__)) + raise ValueError(u'{0} has no id'.format(type(self).__name__)) # Essential field accessors. @classmethod - def _type(self, key): + def _type(cls, key): """Get the type of a field, a `Type` instance. If the field has no explicit type, it is given the base `Type`, which does no conversion. """ - return self._fields.get(key) or self._types.get(key) or types.DEFAULT + return cls._fields.get(key) or cls._types.get(key) or types.DEFAULT def __getitem__(self, key): """Get the value for a field. Raise a KeyError if the field is @@ -251,23 +258,30 @@ class Model(object): del self._values_flex[key] self._dirty.add(key) # Mark for dropping on store. elif key in self._getters(): # Computed. - raise KeyError('computed field {0} cannot be deleted'.format(key)) + raise KeyError(u'computed field {0} cannot be deleted'.format(key)) elif key in self._fields: # Fixed. - raise KeyError('fixed field {0} cannot be deleted'.format(key)) + raise KeyError(u'fixed field {0} cannot be deleted'.format(key)) else: - raise KeyError('no such field {0}'.format(key)) + raise KeyError(u'no such field {0}'.format(key)) def keys(self, computed=False): """Get a list of available field names for this object. The `computed` parameter controls whether computed (plugin-provided) fields are included in the key list. """ - base_keys = list(self._fields) + self._values_flex.keys() + base_keys = list(self._fields) + list(self._values_flex.keys()) if computed: - return base_keys + self._getters().keys() + return base_keys + list(self._getters().keys()) else: return base_keys + @classmethod + def all_keys(cls): + """Get a list of available keys for objects of this type. + Includes fixed and computed fields. + """ + return list(cls._fields) + list(cls._getters().keys()) + # Act like a dictionary. def update(self, values): @@ -307,12 +321,12 @@ class Model(object): def __getattr__(self, key): if key.startswith('_'): - raise AttributeError('model has no attribute {0!r}'.format(key)) + raise AttributeError(u'model has no attribute {0!r}'.format(key)) else: try: return self[key] except KeyError: - raise AttributeError('no such field {0!r}'.format(key)) + raise AttributeError(u'no such field {0!r}'.format(key)) def __setattr__(self, key, value): if key.startswith('_'): @@ -328,15 +342,19 @@ class Model(object): # Database interaction (CRUD methods). - def store(self): + def store(self, fields=None): """Save the object's metadata into the library database. + :param fields: the fields to be stored. If not specified, all fields + will be. """ + if fields is None: + fields = self._fields self._check_db() # Build assignments for query. assignments = [] subvars = [] - for key in self._fields: + for key in fields: if key != 'id' and key in self._dirty: self._dirty.remove(key) assignments.append(key + '=?') @@ -379,7 +397,7 @@ class Model(object): """ self._check_db() stored_obj = self._db._get(type(self), self.id) - assert stored_obj is not None, "object {0} not in DB".format(self.id) + assert stored_obj is not None, u"object {0} not in DB".format(self.id) self._values_fixed = {} self._values_flex = {} self.update(dict(stored_obj)) @@ -440,7 +458,7 @@ class Model(object): separators will be added to the template. """ # Perform substitution. - if isinstance(template, basestring): + if isinstance(template, six.string_types): template = Template(template) return template.substitute(self.formatted(for_path), self._template_funcs()) @@ -451,11 +469,16 @@ class Model(object): def _parse(cls, key, string): """Parse a string as a value for the given key. """ - if not isinstance(string, basestring): - raise TypeError("_parse() argument must be a string") + if not isinstance(string, six.string_types): + raise TypeError(u"_parse() argument must be a string") return cls._type(key).parse(string) + def set_parse(self, key, string): + """Set the object's key to a value represented by a string. + """ + self[key] = self._parse(key, string) + # Database controller and supporting interfaces. @@ -576,6 +599,11 @@ class Results(object): return self._row_count def __nonzero__(self): + """Does this result contain any objects? + """ + return self.__bool__() + + def __bool__(self): """Does this result contain any objects? """ return bool(len(self)) @@ -592,10 +620,10 @@ class Results(object): it = iter(self) try: for i in range(n): - it.next() - return it.next() + next(it) + return next(it) except StopIteration: - raise IndexError('result index {0} out of range'.format(n)) + raise IndexError(u'result index {0} out of range'.format(n)) def get(self): """Return the first matching object, or None if no objects @@ -603,7 +631,7 @@ class Results(object): """ it = iter(self) try: - return it.next() + return next(it) except StopIteration: return None @@ -668,8 +696,9 @@ class Database(object): """The Model subclasses representing tables in this database. """ - def __init__(self, path): + def __init__(self, path, timeout=5.0): self.path = path + self.timeout = timeout self._connections = {} self._tx_stacks = defaultdict(list) @@ -704,18 +733,36 @@ class Database(object): if thread_id in self._connections: return self._connections[thread_id] else: - # Make a new connection. - conn = sqlite3.connect( - self.path, - timeout=beets.config['timeout'].as_number(), - ) - - # Access SELECT results like dictionaries. - conn.row_factory = sqlite3.Row - + conn = self._create_connection() self._connections[thread_id] = conn return conn + def _create_connection(self): + """Create a SQLite connection to the underlying database. + + Makes a new connection every time. If you need to configure the + connection settings (e.g., add custom functions), override this + method. + """ + # Make a new connection. The `sqlite3` module can't use + # bytestring paths here on Python 3, so we need to + # provide a `str` using `py3_path`. + conn = sqlite3.connect( + py3_path(self.path), timeout=self.timeout + ) + + # Access SELECT results like dictionaries. + conn.row_factory = sqlite3.Row + return conn + + def _close(self): + """Close the all connections to the underlying SQLite database + from all threads. This does not render the database object + unusable; new connections can still be opened on demand. + """ + with self._shared_map_lock: + self._connections.clear() + @contextlib.contextmanager def _tx_stack(self): """A context manager providing access to the current thread's diff --git a/lib/beets/dbcore/query.py b/lib/beets/dbcore/query.py old mode 100644 new mode 100755 index 5a116eb2..470ca2ac --- a/lib/beets/dbcore/query.py +++ b/lib/beets/dbcore/query.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,10 +15,49 @@ """The Query type hierarchy for DBCore. """ +from __future__ import division, absolute_import, print_function + import re -from operator import attrgetter +from operator import mul from beets import util from datetime import datetime, timedelta +import unicodedata +from functools import reduce +import six + +if not six.PY2: + buffer = memoryview # sqlite won't accept memoryview in python 2 + + +class ParsingError(ValueError): + """Abstract class for any unparseable user-requested album/query + specification. + """ + + +class InvalidQueryError(ParsingError): + """Represent any kind of invalid query. + + The query should be a unicode string or a list, which will be space-joined. + """ + def __init__(self, query, explanation): + if isinstance(query, list): + query = " ".join(query) + message = u"'{0}': {1}".format(query, explanation) + super(InvalidQueryError, self).__init__(message) + + +class InvalidQueryArgumentTypeError(ParsingError): + """Represent a query argument that could not be converted as expected. + + It exists to be caught in upper stack levels so a meaningful (i.e. with the + query) InvalidQueryError can be raised. + """ + def __init__(self, what, expected, detail=None): + message = u"'{0}' is not {1}".format(what, expected) + if detail: + message = u"{0}: {1}".format(message, detail) + super(InvalidQueryArgumentTypeError, self).__init__(message) class Query(object): @@ -25,9 +65,8 @@ class Query(object): """ def clause(self): """Generate an SQLite expression implementing the query. - Return a clause string, a sequence of substitution values for - the clause, and a Query object representing the "remainder" - Returns (clause, subvals) where clause is a valid sqlite + + Return (clause, subvals) where clause is a valid sqlite WHERE clause implementing the query and subvals is a list of items to be substituted for ?s in the clause. """ @@ -39,6 +78,15 @@ class Query(object): """ raise NotImplementedError + def __repr__(self): + return "{0.__class__.__name__}()".format(self) + + def __eq__(self, other): + return type(self) == type(other) + + def __hash__(self): + return 0 + class FieldQuery(Query): """An abstract query that searches in a specific field for a @@ -72,6 +120,17 @@ class FieldQuery(Query): def match(self, item): return self.value_match(self.pattern, item.get(self.field)) + def __repr__(self): + return ("{0.__class__.__name__}({0.field!r}, {0.pattern!r}, " + "{0.fast})".format(self)) + + def __eq__(self, other): + return super(FieldQuery, self).__eq__(other) and \ + self.field == other.field and self.pattern == other.pattern + + def __hash__(self): + return hash((self.field, hash(self.pattern))) + class MatchQuery(FieldQuery): """A query that looks for exact matches in an item field.""" @@ -86,19 +145,21 @@ class MatchQuery(FieldQuery): class NoneQuery(FieldQuery): def __init__(self, field, fast=True): - self.field = field - self.fast = fast + super(NoneQuery, self).__init__(field, None, fast) def col_clause(self): return self.field + " IS NULL", () @classmethod - def match(self, item): + def match(cls, item): try: - return item[self.field] is None + return item[cls.field] is None except KeyError: return True + def __repr__(self): + return "{0.__class__.__name__}({0.field!r}, {0.fast})".format(self) + class StringFieldQuery(FieldQuery): """A FieldQuery that converts values to strings before matching @@ -139,15 +200,31 @@ class SubstringQuery(StringFieldQuery): class RegexpQuery(StringFieldQuery): """A query that matches a regular expression in a specific item field. + + Raises InvalidQueryError when the pattern is not a valid regular + expression. """ + def __init__(self, field, pattern, fast=True): + super(RegexpQuery, self).__init__(field, pattern, fast) + pattern = self._normalize(pattern) + try: + self.pattern = re.compile(self.pattern) + except re.error as exc: + # Invalid regular expression. + raise InvalidQueryArgumentTypeError(pattern, + u"a regular expression", + format(exc)) + + @staticmethod + def _normalize(s): + """Normalize a Unicode string's representation (used on both + patterns and matched values). + """ + return unicodedata.normalize('NFC', s) + @classmethod def string_match(cls, pattern, value): - try: - res = re.search(pattern, value) - except re.error: - # Invalid regular expression. - return False - return res is not None + return pattern.search(cls._normalize(value)) is not None class BooleanQuery(MatchQuery): @@ -156,28 +233,26 @@ class BooleanQuery(MatchQuery): """ def __init__(self, field, pattern, fast=True): super(BooleanQuery, self).__init__(field, pattern, fast) - if isinstance(pattern, basestring): + if isinstance(pattern, six.string_types): self.pattern = util.str2bool(pattern) self.pattern = int(self.pattern) class BytesQuery(MatchQuery): """Match a raw bytes field (i.e., a path). This is a necessary hack - to work around the `sqlite3` module's desire to treat `str` and + to work around the `sqlite3` module's desire to treat `bytes` and `unicode` equivalently in Python 2. Always use this query instead of `MatchQuery` when matching on BLOB values. """ def __init__(self, field, pattern): super(BytesQuery, self).__init__(field, pattern) - # Use a buffer representation of the pattern for SQLite + # Use a buffer/memoryview representation of the pattern for SQLite # matching. This instructs SQLite to treat the blob as binary # rather than encoded Unicode. - if isinstance(self.pattern, basestring): - # Implicitly coerce Unicode strings to their bytes - # equivalents. - if isinstance(self.pattern, unicode): - self.pattern = self.pattern.encode('utf8') + if isinstance(self.pattern, (six.text_type, bytes)): + if isinstance(self.pattern, six.text_type): + self.pattern = self.pattern.encode('utf-8') self.buf_pattern = buffer(self.pattern) elif isinstance(self.pattern, buffer): self.buf_pattern = self.pattern @@ -191,19 +266,26 @@ class NumericQuery(FieldQuery): """Matches numeric fields. A syntax using Ruby-style range ellipses (``..``) lets users specify one- or two-sided ranges. For example, ``year:2001..`` finds music released since the turn of the century. + + Raises InvalidQueryError when the pattern does not represent an int or + a float. """ def _convert(self, s): - """Convert a string to a numeric type (float or int). If the - string cannot be converted, return None. + """Convert a string to a numeric type (float or int). + + Return None if `s` is empty. + Raise an InvalidQueryError if the string cannot be converted. """ # This is really just a bit of fun premature optimization. + if not s: + return None try: return int(s) except ValueError: try: return float(s) except ValueError: - return None + raise InvalidQueryArgumentTypeError(s, u"an int or a float") def __init__(self, field, pattern, fast=True): super(NumericQuery, self).__init__(field, pattern, fast) @@ -224,7 +306,7 @@ class NumericQuery(FieldQuery): if self.field not in item: return False value = item[self.field] - if isinstance(value, basestring): + if isinstance(value, six.string_types): value = self._convert(value) if self.point is not None: @@ -248,7 +330,7 @@ class NumericQuery(FieldQuery): elif self.rangemax is not None: return u'{0} <= ?'.format(self.field), (self.rangemax,) else: - return '1', () + return u'1', () class CollectionQuery(Query): @@ -273,7 +355,7 @@ class CollectionQuery(Query): return item in self.subqueries def clause_with_joiner(self, joiner): - """Returns a clause created by joining together the clauses of + """Return a clause created by joining together the clauses of all subqueries with the string joiner (padded by spaces). """ clause_parts = [] @@ -288,6 +370,19 @@ class CollectionQuery(Query): clause = (' ' + joiner + ' ').join(clause_parts) return clause, subvals + def __repr__(self): + return "{0.__class__.__name__}({0.subqueries!r})".format(self) + + def __eq__(self, other): + return super(CollectionQuery, self).__eq__(other) and \ + self.subqueries == other.subqueries + + def __hash__(self): + """Since subqueries are mutable, this object should not be hashable. + However and for conveniences purposes, it can be hashed. + """ + return reduce(mul, map(hash, self.subqueries), 1) + class AnyFieldQuery(CollectionQuery): """A query that matches if a given FieldQuery subclass matches in @@ -313,6 +408,17 @@ class AnyFieldQuery(CollectionQuery): return True return False + def __repr__(self): + return ("{0.__class__.__name__}({0.pattern!r}, {0.fields!r}, " + "{0.query_class.__name__})".format(self)) + + def __eq__(self, other): + return super(AnyFieldQuery, self).__eq__(other) and \ + self.query_class == other.query_class + + def __hash__(self): + return hash((self.pattern, tuple(self.fields), self.query_class)) + class MutableCollectionQuery(CollectionQuery): """A collection query whose subqueries may be modified after the @@ -343,6 +449,36 @@ class OrQuery(MutableCollectionQuery): return any([q.match(item) for q in self.subqueries]) +class NotQuery(Query): + """A query that matches the negation of its `subquery`, as a shorcut for + performing `not(subquery)` without using regular expressions. + """ + def __init__(self, subquery): + self.subquery = subquery + + def clause(self): + clause, subvals = self.subquery.clause() + if clause: + return 'not ({0})'.format(clause), subvals + else: + # If there is no clause, there is nothing to negate. All the logic + # is handled by match() for slow queries. + return clause, subvals + + def match(self, item): + return not self.subquery.match(item) + + def __repr__(self): + return "{0.__class__.__name__}({0.subquery!r})".format(self) + + def __eq__(self, other): + return super(NotQuery, self).__eq__(other) and \ + self.subquery == other.subquery + + def __hash__(self): + return hash(('not', hash(self.subquery))) + + class TrueQuery(Query): """A query that always matches.""" def clause(self): @@ -367,13 +503,13 @@ def _to_epoch_time(date): """Convert a `datetime` object to an integer number of seconds since the (local) Unix epoch. """ - epoch = datetime.fromtimestamp(0) - delta = date - epoch - try: + if hasattr(date, 'timestamp'): + # The `timestamp` method exists on Python 3.3+. + return int(date.timestamp()) + else: + epoch = datetime.fromtimestamp(0) + delta = date - epoch return int(delta.total_seconds()) - except AttributeError: - # datetime.timedelta.total_seconds() is not available on Python 2.6 - return delta.seconds + delta.days * 24 * 3600 def _parse_periods(pattern): @@ -405,7 +541,7 @@ class Period(object): precision (a string, one of "year", "month", or "day"). """ if precision not in Period.precisions: - raise ValueError('Invalid precision ' + str(precision)) + raise ValueError(u'Invalid precision {0}'.format(precision)) self.date = date self.precision = precision @@ -445,7 +581,7 @@ class Period(object): elif 'day' == precision: return date + timedelta(days=1) else: - raise ValueError('unhandled precision ' + str(precision)) + raise ValueError(u'unhandled precision {0}'.format(precision)) class DateInterval(object): @@ -457,7 +593,7 @@ class DateInterval(object): def __init__(self, start, end): if start is not None and end is not None and not start < end: - raise ValueError("start date {0} is not before end date {1}" + raise ValueError(u"start date {0} is not before end date {1}" .format(start, end)) self.start = start self.end = end @@ -478,7 +614,7 @@ class DateInterval(object): return True def __str__(self): - return'[{0}, {1})'.format(self.start, self.end) + return '[{0}, {1})'.format(self.start, self.end) class DateQuery(FieldQuery): @@ -496,6 +632,8 @@ class DateQuery(FieldQuery): self.interval = DateInterval.from_periods(start, end) def match(self, item): + if self.field not in item: + return False timestamp = float(item[self.field]) date = datetime.utcfromtimestamp(timestamp) return self.interval.contains(date) @@ -523,6 +661,33 @@ class DateQuery(FieldQuery): return clause, subvals +class DurationQuery(NumericQuery): + """NumericQuery that allow human-friendly (M:SS) time interval formats. + + Converts the range(s) to a float value, and delegates on NumericQuery. + + Raises InvalidQueryError when the pattern does not represent an int, float + or M:SS time interval. + """ + def _convert(self, s): + """Convert a M:SS or numeric string to a float. + + Return None if `s` is empty. + Raise an InvalidQueryError if the string cannot be converted. + """ + if not s: + return None + try: + return util.raw_seconds_short(s) + except ValueError: + try: + return float(s) + except ValueError: + raise InvalidQueryArgumentTypeError( + s, + u"a M:SS string or a float") + + # Sorting. class Sort(object): @@ -547,6 +712,12 @@ class Sort(object): """ return False + def __hash__(self): + return 0 + + def __eq__(self, other): + return type(self) == type(other) + class MultipleSort(Sort): """Sort that encapsulates multiple sub-sorts. @@ -606,38 +777,67 @@ class MultipleSort(Sort): return items def __repr__(self): - return u'MultipleSort({0})'.format(repr(self.sorts)) + return 'MultipleSort({!r})'.format(self.sorts) + + def __hash__(self): + return hash(tuple(self.sorts)) + + def __eq__(self, other): + return super(MultipleSort, self).__eq__(other) and \ + self.sorts == other.sorts class FieldSort(Sort): """An abstract sort criterion that orders by a specific field (of any kind). """ - def __init__(self, field, ascending=True): + def __init__(self, field, ascending=True, case_insensitive=True): self.field = field self.ascending = ascending + self.case_insensitive = case_insensitive def sort(self, objs): # TODO: Conversion and null-detection here. In Python 3, # comparisons with None fail. We should also support flexible # attributes with different types without falling over. - return sorted(objs, key=attrgetter(self.field), - reverse=not self.ascending) + + def key(item): + field_val = item.get(self.field, '') + if self.case_insensitive and isinstance(field_val, six.text_type): + field_val = field_val.lower() + return field_val + + return sorted(objs, key=key, reverse=not self.ascending) def __repr__(self): - return u'<{0}: {1}{2}>'.format( + return '<{0}: {1}{2}>'.format( type(self).__name__, self.field, '+' if self.ascending else '-', ) + def __hash__(self): + return hash((self.field, self.ascending)) + + def __eq__(self, other): + return super(FieldSort, self).__eq__(other) and \ + self.field == other.field and \ + self.ascending == other.ascending + class FixedFieldSort(FieldSort): """Sort object to sort on a fixed field. """ def order_clause(self): order = "ASC" if self.ascending else "DESC" - return "{0} {1}".format(self.field, order) + if self.case_insensitive: + field = '(CASE ' \ + 'WHEN TYPEOF({0})="text" THEN LOWER({0}) ' \ + 'WHEN TYPEOF({0})="blob" THEN LOWER({0}) ' \ + 'ELSE {0} END)'.format(self.field) + else: + field = self.field + return "{0} {1}".format(field, order) class SlowFieldSort(FieldSort): @@ -650,5 +850,17 @@ class SlowFieldSort(FieldSort): class NullSort(Sort): """No sorting. Leave results unsorted.""" - def sort(items): + def sort(self, items): return items + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + return False + + def __eq__(self, other): + return type(self) == type(other) or other is None + + def __hash__(self): + return 0 diff --git a/lib/beets/dbcore/queryparse.py b/lib/beets/dbcore/queryparse.py old mode 100644 new mode 100755 index 90963696..bc9cc77e --- a/lib/beets/dbcore/queryparse.py +++ b/lib/beets/dbcore/queryparse.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,13 +15,17 @@ """Parsing of strings into DBCore queries. """ +from __future__ import division, absolute_import, print_function + import re import itertools from . import query - +import beets PARSE_QUERY_PART_REGEX = re.compile( # Non-capturing optional segment for the keyword. + r'(-|\^)?' # Negation prefixes. + r'(?:' r'(\S+?)' # The field key. r'(? (None, 'stapler', SubstringQuery) - 'color:red' -> ('color', 'red', SubstringQuery) - ':^Quiet' -> (None, '^Quiet', RegexpQuery) - 'color::b..e' -> ('color', 'b..e', RegexpQuery) + The function returns a tuple consisting of: + - The field name: a string or None if it's not present. + - The pattern, a string. + - The query class to use, which inherits from the base + :class:`Query` type. + - A negation flag, a bool. - Prefixes may be "escaped" with a backslash to disable the keying - behavior. + The three optional parameters determine which query class is used (i.e., + the third return value). They are: + - `query_classes`, which maps field names to query classes. These + are used when no explicit prefix is present. + - `prefixes`, which maps prefix strings to query classes. + - `default_class`, the fallback when neither the field nor a prefix + indicates a query class. + + So the precedence for determining which query class to return is: + prefix, followed by field, and finally the default. + + For example, assuming the `:` prefix is used for `RegexpQuery`: + - `'stapler'` -> `(None, 'stapler', SubstringQuery, False)` + - `'color:red'` -> `('color', 'red', SubstringQuery, False)` + - `':^Quiet'` -> `(None, '^Quiet', RegexpQuery, False)`, because + the `^` follows the `:` + - `'color::b..e'` -> `('color', 'b..e', RegexpQuery, False)` + - `'-color:red'` -> `('color', 'red', SubstringQuery, True)` """ + # Apply the regular expression and extract the components. part = part.strip() match = PARSE_QUERY_PART_REGEX.match(part) - assert match # Regex should always match. - key = match.group(1) - term = match.group(2).replace('\:', ':') + assert match # Regex should always match + negate = bool(match.group(1)) + key = match.group(2) + term = match.group(3).replace('\:', ':') - # Match the search term against the list of prefixes. + # Check whether there's a prefix in the query and use the + # corresponding query type. for pre, query_class in prefixes.items(): if term.startswith(pre): - return key, term[len(pre):], query_class + return key, term[len(pre):], query_class, negate - # No matching prefix: use type-based or fallback/default query. + # No matching prefix, so use either the query class determined by + # the field or the default as a fallback. query_class = query_classes.get(key, default_class) - return key, term, query_class + return key, term, query_class, negate def construct_query_part(model_cls, prefixes, query_part): - """Create a query from a single query component, `query_part`, for - querying instances of `model_cls`. Return a `Query` instance. + """Parse a *query part* string and return a :class:`Query` object. + + :param model_cls: The :class:`Model` class that this is a query for. + This is used to determine the appropriate query types for the + model's fields. + :param prefixes: A map from prefix strings to :class:`Query` types. + :param query_part: The string to parse. + + See the documentation for `parse_query_part` for more information on + query part syntax. """ - # Shortcut for empty query parts. + # A shortcut for empty query parts. if not query_part: return query.TrueQuery() - # Get the query classes for each possible field. + # Use `model_cls` to build up a map from field names to `Query` + # classes. query_classes = {} for k, t in itertools.chain(model_cls._fields.items(), model_cls._types.items()): query_classes[k] = t.query # Parse the string. - key, pattern, query_class = \ + key, pattern, query_class, negate = \ parse_query_part(query_part, query_classes, prefixes) - # No key specified. + # If there's no key (field name) specified, this is a "match + # anything" query. if key is None: if issubclass(query_class, query.FieldQuery): # The query type matches a specific field, but none was # specified. So we use a version of the query that matches # any field. - return query.AnyFieldQuery(pattern, model_cls._search_fields, - query_class) + q = query.AnyFieldQuery(pattern, model_cls._search_fields, + query_class) + if negate: + return query.NotQuery(q) + else: + return q else: - # Other query type. - return query_class(pattern) + # Non-field query type. + if negate: + return query.NotQuery(query_class(pattern)) + else: + return query_class(pattern) + # Otherwise, this must be a `FieldQuery`. Use the field name to + # construct the query object. key = key.lower() - return query_class(key.lower(), pattern, key in model_cls._fields) + q = query_class(key.lower(), pattern, key in model_cls._fields) + if negate: + return query.NotQuery(q) + return q def query_from_strings(query_cls, model_cls, prefixes, query_parts): @@ -136,13 +185,15 @@ def construct_sort_part(model_cls, part): assert direction in ('+', '-'), "part must end with + or -" is_ascending = direction == '+' + case_insensitive = beets.config['sort_case_insensitive'].get(bool) if field in model_cls._sorts: - sort = model_cls._sorts[field](model_cls, is_ascending) + sort = model_cls._sorts[field](model_cls, is_ascending, + case_insensitive) elif field in model_cls._fields: - sort = query.FixedFieldSort(field, is_ascending) + sort = query.FixedFieldSort(field, is_ascending, case_insensitive) else: # Flexible or computed. - sort = query.SlowFieldSort(field, is_ascending) + sort = query.SlowFieldSort(field, is_ascending, case_insensitive) return sort @@ -150,31 +201,50 @@ def sort_from_strings(model_cls, sort_parts): """Create a `Sort` from a list of sort criteria (strings). """ if not sort_parts: - return query.NullSort() + sort = query.NullSort() + elif len(sort_parts) == 1: + sort = construct_sort_part(model_cls, sort_parts[0]) else: sort = query.MultipleSort() for part in sort_parts: sort.add_sort(construct_sort_part(model_cls, part)) - return sort + return sort -def parse_sorted_query(model_cls, parts, prefixes={}, - query_cls=query.AndQuery): +def parse_sorted_query(model_cls, parts, prefixes={}): """Given a list of strings, create the `Query` and `Sort` that they represent. """ # Separate query token and sort token. query_parts = [] sort_parts = [] - for part in parts: - if part.endswith((u'+', u'-')) and u':' not in part: - sort_parts.append(part) - else: - query_parts.append(part) - # Parse each. - q = query_from_strings( - query_cls, model_cls, prefixes, query_parts - ) + # Split up query in to comma-separated subqueries, each representing + # an AndQuery, which need to be joined together in one OrQuery + subquery_parts = [] + for part in parts + [u',']: + if part.endswith(u','): + # Ensure we can catch "foo, bar" as well as "foo , bar" + last_subquery_part = part[:-1] + if last_subquery_part: + subquery_parts.append(last_subquery_part) + # Parse the subquery in to a single AndQuery + # TODO: Avoid needlessly wrapping AndQueries containing 1 subquery? + query_parts.append(query_from_strings( + query.AndQuery, model_cls, prefixes, subquery_parts + )) + del subquery_parts[:] + else: + # Sort parts (1) end in + or -, (2) don't have a field, and + # (3) consist of more than just the + or -. + if part.endswith((u'+', u'-')) \ + and u':' not in part \ + and len(part) > 1: + sort_parts.append(part) + else: + subquery_parts.append(part) + + # Avoid needlessly wrapping single statements in an OR + q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0] s = sort_from_strings(model_cls, sort_parts) return q, s diff --git a/lib/beets/dbcore/types.py b/lib/beets/dbcore/types.py old mode 100644 new mode 100755 index 82346e70..b909904b --- a/lib/beets/dbcore/types.py +++ b/lib/beets/dbcore/types.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,8 +15,14 @@ """Representation of type information for DBCore model fields. """ +from __future__ import division, absolute_import, print_function + from . import query from beets.util import str2bool +import six + +if not six.PY2: + buffer = memoryview # sqlite won't accept memoryview in python 2 # Abstract base. @@ -34,7 +41,7 @@ class Type(object): """The `Query` subclass to be used when querying the field. """ - model_type = unicode + model_type = six.text_type """The Python type that is used to represent the value in the model. The model is guaranteed to return a value of this type if the field @@ -58,9 +65,9 @@ class Type(object): if value is None: value = u'' if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') - return unicode(value) + return six.text_type(value) def parse(self, string): """Parse a (possibly human-written) string and return the @@ -93,13 +100,13 @@ class Type(object): http://www.sqlite.org/datatype3.html https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types - Flexible fields have the type afinity `TEXT`. This means the - `sql_value` is either a `buffer` or a `unicode` object` and the - method must handle these in addition. + Flexible fields have the type affinity `TEXT`. This means the + `sql_value` is either a `buffer`/`memoryview` or a `unicode` object` + and the method must handle these in addition. """ if isinstance(sql_value, buffer): - sql_value = bytes(sql_value).decode('utf8', 'ignore') - if isinstance(sql_value, unicode): + sql_value = bytes(sql_value).decode('utf-8', 'ignore') + if isinstance(sql_value, six.text_type): return self.parse(sql_value) else: return self.normalize(sql_value) @@ -191,7 +198,7 @@ class Boolean(Type): model_type = bool def format(self, value): - return unicode(bool(value)) + return six.text_type(bool(value)) def parse(self, string): return str2bool(string) diff --git a/lib/beets/importer.py b/lib/beets/importer.py old mode 100644 new mode 100755 index 4a7bd997..690a499f --- a/lib/beets/importer.py +++ b/lib/beets/importer.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -12,14 +13,14 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. +from __future__ import division, absolute_import, print_function + """Provides the basic, interface-agnostic workflow for importing and autotagging music files. """ -from __future__ import print_function import os import re -import logging import pickle import itertools from collections import defaultdict @@ -27,7 +28,9 @@ from tempfile import mkdtemp from bisect import insort, bisect_left from contextlib import contextmanager import shutil +import time +from beets import logging from beets import autotag from beets import library from beets import dbcore @@ -40,12 +43,13 @@ from enum import Enum from beets import mediafile action = Enum('action', - ['SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', - 'ALBUMS']) + ['SKIP', 'ASIS', 'TRACKS', 'APPLY', 'ALBUMS', 'RETAG']) +# The RETAG action represents "don't apply any match, but do record +# new metadata". It's not reachable via the standard command prompt but +# can be used by plugins. QUEUE_SIZE = 128 SINGLE_ARTIST_THRESH = 0.25 -VARIOUS_ARTISTS = u'Various Artists' PROGRESS_KEY = 'tagprogress' HISTORY_KEY = 'taghistory' @@ -64,24 +68,24 @@ class ImportAbort(Exception): def _open_state(): """Reads the state file, returning a dictionary.""" try: - with open(config['statefile'].as_filename()) as f: + with open(config['statefile'].as_filename(), 'rb') as f: return pickle.load(f) except Exception as exc: # The `pickle` module can emit all sorts of exceptions during # unpickling, including ImportError. We use a catch-all # exception to avoid enumerating them all (the docs don't even have a # full list!). - log.debug(u'state file could not be read: {0}'.format(exc)) + log.debug(u'state file could not be read: {0}', exc) return {} def _save_state(state): """Writes the state dictionary out to disk.""" try: - with open(config['statefile'].as_filename(), 'w') as f: + with open(config['statefile'].as_filename(), 'wb') as f: pickle.dump(state, f) except IOError as exc: - log.error(u'state file could not be written: {0}'.format(exc)) + log.error(u'state file could not be written: {0}', exc) # Utilities for reading and writing the beets progress file, which @@ -174,22 +178,28 @@ class ImportSession(object): """Controls an import action. Subclasses should implement methods to communicate with the user or otherwise make decisions. """ - def __init__(self, lib, logfile, paths, query): - """Create a session. `lib` is a Library object. `logfile` is a - file-like object open for writing or None if no logging is to be - performed. Either `paths` or `query` is non-null and indicates + def __init__(self, lib, loghandler, paths, query): + """Create a session. `lib` is a Library object. `loghandler` is a + logging.Handler. Either `paths` or `query` is non-null and indicates the source of files to be imported. """ self.lib = lib - self.logfile = logfile + self.logger = self._setup_logging(loghandler) self.paths = paths self.query = query - self.seen_idents = set() self._is_resuming = dict() # Normalize the paths. if self.paths: - self.paths = map(normpath, self.paths) + self.paths = list(map(normpath, self.paths)) + + def _setup_logging(self, loghandler): + logger = logging.getLogger(__name__) + logger.propagate = False + if not loghandler: + loghandler = logging.NullHandler() + logger.handlers = [loghandler] + return logger def set_config(self, config): """Set `config` property from global import config and make @@ -210,13 +220,19 @@ class ImportSession(object): iconfig['resume'] = False iconfig['incremental'] = False - # Copy, move, and link are mutually exclusive. + # Copy, move, link, and hardlink are mutually exclusive. if iconfig['move']: iconfig['copy'] = False iconfig['link'] = False + iconfig['hardlink'] = False elif iconfig['link']: iconfig['copy'] = False iconfig['move'] = False + iconfig['hardlink'] = False + elif iconfig['hardlink']: + iconfig['copy'] = False + iconfig['move'] = False + iconfig['link'] = False # Only delete when copying. if not iconfig['copy']: @@ -225,13 +241,10 @@ class ImportSession(object): self.want_resume = config['resume'].as_choice([True, False, 'ask']) def tag_log(self, status, paths): - """Log a message about a given album to logfile. The status should - reflect the reason the album couldn't be tagged. + """Log a message about a given album to the importer log. The status + should reflect the reason the album couldn't be tagged. """ - if self.logfile: - print(u'{0} {1}'.format(status, displayable_path(paths)), - file=self.logfile) - self.logfile.flush() + self.logger.info(u'{0} {1}', status, displayable_path(paths)) def log_choice(self, task, duplicate=False): """Logs the task's current choice if it should be logged. If @@ -242,17 +255,17 @@ class ImportSession(object): if duplicate: # Duplicate: log all three choices (skip, keep both, and trump). if task.should_remove_duplicates: - self.tag_log('duplicate-replace', paths) + self.tag_log(u'duplicate-replace', paths) elif task.choice_flag in (action.ASIS, action.APPLY): - self.tag_log('duplicate-keep', paths) + self.tag_log(u'duplicate-keep', paths) elif task.choice_flag is (action.SKIP): - self.tag_log('duplicate-skip', paths) + self.tag_log(u'duplicate-skip', paths) else: # Non-duplicate: log "skip" and "asis" choices. if task.choice_flag is action.ASIS: - self.tag_log('asis', paths) + self.tag_log(u'asis', paths) elif task.choice_flag is action.SKIP: - self.tag_log('skip', paths) + self.tag_log(u'skip', paths) def should_resume(self, path): raise NotImplementedError @@ -269,6 +282,7 @@ class ImportSession(object): def run(self): """Run the import task. """ + self.logger.info(u'import started {0}', time.asctime()) self.set_config(config['import']) # Set up the pipeline. @@ -277,25 +291,31 @@ class ImportSession(object): else: stages = [query_tasks(self)] + # In pretend mode, just log what would otherwise be imported. if self.config['pretend']: - # Only log the imported files and end the pipeline stages += [log_files(self)] else: if self.config['group_albums'] and \ not self.config['singletons']: - # Split directory tasks into one task for each album + # Split directory tasks into one task for each album. stages += [group_albums(self)] + + # These stages either talk to the user to get a decision or, + # in the case of a non-autotagged import, just choose to + # import everything as-is. In *both* cases, these stages + # also add the music to the library database, so later + # stages need to read and write data from there. if self.config['autotag']: - # FIXME We should also resolve duplicates when not - # autotagging. This is currently handled in `user_query` stages += [lookup_candidates(self), user_query(self)] else: stages += [import_asis(self)] - stages += [apply_choices(self)] + # Plugin stages. for stage_func in plugins.import_stages(): stages.append(plugin_stage(self, stage_func)) + stages += [manipulate_files(self)] + pl = pipeline.Pipeline(stages) # Run the pipeline. @@ -316,7 +336,7 @@ class ImportSession(object): been imported in a previous session. """ if self.is_resuming(toppath) \ - and all(map(lambda p: progress_element(toppath, p), paths)): + and all([progress_element(toppath, p) for p in paths]): return True if self.config['incremental'] \ and tuple(paths) in self.history_dirs: @@ -347,8 +367,8 @@ class ImportSession(object): # Either accept immediately or prompt for input to decide. if self.want_resume is True or \ self.should_resume(toppath): - log.warn(u'Resuming interrupted import of {0}'.format( - util.displayable_path(toppath))) + log.warning(u'Resuming interrupted import of {0}', + util.displayable_path(toppath)) self._is_resuming[toppath] = True else: # Clear progress; we're starting from the top. @@ -357,7 +377,34 @@ class ImportSession(object): # The importer task class. -class ImportTask(object): +class BaseImportTask(object): + """An abstract base class for importer tasks. + + Tasks flow through the importer pipeline. Each stage can update + them. """ + def __init__(self, toppath, paths, items): + """Create a task. The primary fields that define a task are: + + * `toppath`: The user-specified base directory that contains the + music for this task. If the task has *no* user-specified base + (for example, when importing based on an -L query), this can + be None. This is used for tracking progress and history. + * `paths`: A list of *specific* paths where the music for this task + came from. These paths can be directories, when their entire + contents are being imported, or files, when the task comprises + individual tracks. This is used for progress/history tracking and + for displaying the task to the user. + * `items`: A list of `Item` objects representing the music being + imported. + + These fields should not change after initialization. + """ + self.toppath = toppath + self.paths = paths + self.items = items + + +class ImportTask(BaseImportTask): """Represents a single set of items to be imported along with its intermediate state. May represent an album or a single item. @@ -385,28 +432,25 @@ class ImportTask(object): * `finalize()` Update the import progress and cleanup the file system. """ - def __init__(self, toppath=None, paths=None, items=None): - self.toppath = toppath - self.paths = paths - self.items = items + def __init__(self, toppath, paths, items): + super(ImportTask, self).__init__(toppath, paths, items) self.choice_flag = None - self.cur_album = None self.cur_artist = None self.candidates = [] self.rec = None - # TODO remove this eventually self.should_remove_duplicates = False self.is_album = True + self.search_ids = [] # user-supplied candidate IDs. def set_choice(self, choice): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. """ # Not part of the task structure: - assert choice not in (action.MANUAL, action.MANUAL_ID) assert choice != action.APPLY # Only used internally. - if choice in (action.SKIP, action.ASIS, action.TRACKS, action.ALBUMS): + if choice in (action.SKIP, action.ASIS, action.TRACKS, action.ALBUMS, + action.RETAG): self.choice_flag = choice self.match = None else: @@ -442,10 +486,10 @@ class ImportTask(object): """Returns identifying metadata about the current choice. For albums, this is an (artist, album) pair. For items, this is (artist, title). May only be called when the choice flag is ASIS - (in which case the data comes from the files' current metadata) - or APPLY (data comes from the choice). + or RETAG (in which case the data comes from the files' current + metadata) or APPLY (data comes from the choice). """ - if self.choice_flag is action.ASIS: + if self.choice_flag in (action.ASIS, action.RETAG): return (self.cur_artist, self.cur_album) elif self.choice_flag is action.APPLY: return (self.match.info.artist, self.match.info.album) @@ -456,21 +500,16 @@ class ImportTask(object): If the tasks applies an album match the method only returns the matched items. """ - if self.choice_flag == action.ASIS: + if self.choice_flag in (action.ASIS, action.RETAG): return list(self.items) - # FIXME this should be a simple attribute. There should be no - # need to retrieve the keys of `match.mapping`. This requires - # that we remove unmatched items from the list. elif self.choice_flag == action.APPLY: - return self.match.mapping.keys() + return list(self.match.mapping.keys()) else: assert False def apply_metadata(self): """Copy metadata from match info to the items. """ - # TODO call should be more descriptive like - # apply_metadata(self.match, self.items) autotag.apply_metadata(self.match.info, self.match.mapping) def duplicate_items(self, lib): @@ -481,13 +520,12 @@ class ImportTask(object): def remove_duplicates(self, lib): duplicate_items = self.duplicate_items(lib) - log.debug(u'removing {0} old duplicated items' - .format(len(duplicate_items))) + log.debug(u'removing {0} old duplicated items', len(duplicate_items)) for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug(u'deleting duplicate {0}' - .format(util.displayable_path(item.path))) + log.debug(u'deleting duplicate {0}', + util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -495,9 +533,6 @@ class ImportTask(object): def finalize(self, session): """Save progress, clean up files, and emit plugin event. """ - # FIXME the session argument is unfortunate. It should be - # present as an attribute of the task. - # Update progress. if session.want_resume: self.save_progress() @@ -514,8 +549,6 @@ class ImportTask(object): def cleanup(self, copy=False, delete=False, move=False): """Remove and prune imported paths. """ - # FIXME Maybe the keywords should be task properties. - # Do not delete any files or prune directories when skipping. if self.skip: return @@ -537,21 +570,33 @@ class ImportTask(object): self.prune(old_path) def _emit_imported(self, lib): - # FIXME This shouldn't be here. Skipping should be handled in - # the stages. - if self.skip: - return plugins.send('album_imported', lib=lib, album=self.album) - def lookup_candidates(self): - """Retrieve and store candidates for this album. + def handle_created(self, session): + """Send the `import_task_created` event for this task. Return a list of + tasks that should continue through the pipeline. By default, this is a + list containing only the task itself, but plugins can replace the task + with new ones. """ - artist, album, candidates, recommendation = \ - autotag.tag_album(self.items) + tasks = plugins.send('import_task_created', session=session, task=self) + if not tasks: + tasks = [self] + else: + # The plugins gave us a list of lists of tasks. Flatten it. + tasks = [t for inner in tasks for t in inner] + return tasks + + def lookup_candidates(self): + """Retrieve and store candidates for this album. User-specified + candidate IDs are stored in self.search_ids: if present, the + initial lookup is restricted to only those IDs. + """ + artist, album, prop = \ + autotag.tag_album(self.items, search_ids=self.search_ids) self.cur_artist = artist self.cur_album = album - self.candidates = candidates - self.rec = recommendation + self.candidates = prop.candidates + self.rec = prop.recommendation def find_duplicates(self, lib): """Return a list of albums from `lib` with the same artist and @@ -579,7 +624,10 @@ class ImportTask(object): return duplicates def align_album_level_fields(self): - """Make the some album fields equal across `self.items` + """Make some album fields equal across `self.items`. For the + RETAG action, we assume that the responsible for returning it + (ie. a plugin) always ensures that the first item contains + valid data on the relevant fields. """ changes = {} @@ -596,10 +644,10 @@ class ImportTask(object): changes['comp'] = False else: # VA. - changes['albumartist'] = VARIOUS_ARTISTS + changes['albumartist'] = config['va_name'].as_str() changes['comp'] = True - elif self.choice_flag == action.APPLY: + elif self.choice_flag in (action.APPLY, action.RETAG): # Applying autotagged metadata. Just get AA from the first # item. if not self.items[0].albumartist: @@ -612,19 +660,19 @@ class ImportTask(object): item.update(changes) def manipulate_files(self, move=False, copy=False, write=False, - link=False, session=None): + link=False, hardlink=False, session=None): items = self.imported_items() # Save the original paths of all items for deletion and pruning # in the next step (finalization). self.old_paths = [item.path for item in items] for item in items: - if move or copy or link: + if move or copy or link or hardlink: # In copy and link modes, treat re-imports specially: # move in-library files. (Out-of-library files are # copied/moved as usual). old_path = item.path - if (copy or link) and self.replaced_items[item] and \ - session.lib.directory in util.ancestry(old_path): + if (copy or link or hardlink) and self.replaced_items[item] \ + and session.lib.directory in util.ancestry(old_path): item.move() # We moved the item, so remove the # now-nonexistent file from old_paths. @@ -632,9 +680,9 @@ class ImportTask(object): else: # A normal import. Just copy files and keep track of # old paths. - item.move(copy, link) + item.move(copy, link, hardlink) - if write and self.apply: + if write and (self.apply or self.choice_flag == action.RETAG): item.try_write() with session.lib.transaction(): @@ -683,15 +731,15 @@ class ImportTask(object): if replaced_album: self.album.added = replaced_album.added self.album.update(replaced_album._values_flex) + self.album.artpath = replaced_album.artpath self.album.store() log.debug( u'Reimported album: added {0}, flexible ' - u'attributes {1} from album {2} for {3}'.format( - self.album.added, - replaced_album._values_flex.keys(), - replaced_album.id, - displayable_path(self.album.path), - ) + u'attributes {1} from album {2} for {3}', + self.album.added, + replaced_album._values_flex.keys(), + replaced_album.id, + displayable_path(self.album.path) ) for item in self.imported_items(): @@ -701,20 +749,18 @@ class ImportTask(object): item.added = dup_item.added log.debug( u'Reimported item added {0} ' - u'from item {1} for {2}'.format( - item.added, - dup_item.id, - displayable_path(item.path), - ) + u'from item {1} for {2}', + item.added, + dup_item.id, + displayable_path(item.path) ) item.update(dup_item._values_flex) log.debug( u'Reimported item flexible attributes {0} ' - u'from item {1} for {2}'.format( - dup_item._values_flex.keys(), - dup_item.id, - displayable_path(item.path), - ) + u'from item {1} for {2}', + dup_item._values_flex.keys(), + dup_item.id, + displayable_path(item.path) ) item.store() @@ -724,13 +770,12 @@ class ImportTask(object): """ for item in self.imported_items(): for dup_item in self.replaced_items[item]: - log.debug(u'Replacing item {0}: {1}' - .format(dup_item.id, - displayable_path(item.path))) + log.debug(u'Replacing item {0}: {1}', + dup_item.id, displayable_path(item.path)) dup_item.remove() - log.debug(u'{0} of {1} items replaced' - .format(sum(bool(l) for l in self.replaced_items.values()), - len(self.imported_items()))) + log.debug(u'{0} of {1} items replaced', + sum(bool(l) for l in self.replaced_items.values()), + len(self.imported_items())) def choose_match(self, session): """Ask the session which match should apply and apply it. @@ -766,14 +811,14 @@ class SingletonImportTask(ImportTask): """ def __init__(self, toppath, item): - super(SingletonImportTask, self).__init__(toppath, [item.path]) + super(SingletonImportTask, self).__init__(toppath, [item.path], [item]) self.item = item self.is_album = False self.paths = [item.path] def chosen_ident(self): - assert self.choice_flag in (action.ASIS, action.APPLY) - if self.choice_flag is action.ASIS: + assert self.choice_flag in (action.ASIS, action.APPLY, action.RETAG) + if self.choice_flag in (action.ASIS, action.RETAG): return (self.item.artist, self.item.title) elif self.choice_flag is action.APPLY: return (self.match.info.artist, self.match.info.title) @@ -785,17 +830,13 @@ class SingletonImportTask(ImportTask): autotag.apply_item_metadata(self.item, self.match.info) def _emit_imported(self, lib): - # FIXME This shouldn't be here. Skipped tasks should be removed from - # the pipeline. - if self.skip: - return for item in self.imported_items(): plugins.send('item_imported', lib=lib, item=item) def lookup_candidates(self): - candidates, recommendation = autotag.tag_item(self.item) - self.candidates = candidates - self.rec = recommendation + prop = autotag.tag_item(self.item, search_ids=self.search_ids) + self.candidates = prop.candidates + self.rec = prop.recommendation def find_duplicates(self, lib): """Return a list of items from `lib` that have the same artist @@ -838,22 +879,20 @@ class SingletonImportTask(ImportTask): # FIXME The inheritance relationships are inverted. This is why there -# are so many methods which pass. We should introduce a new -# BaseImportTask class. +# are so many methods which pass. More responsibility should be delegated to +# the BaseImportTask class. class SentinelImportTask(ImportTask): - """This class marks the progress of an import and does not import - any items itself. + """A sentinel task marks the progress of an import and does not + import any items itself. - If only `toppath` is set the task indicats the end of a top-level - directory import. If the `paths` argument is givent, too, the task + If only `toppath` is set the task indicates the end of a top-level + directory import. If the `paths` argument is also given, the task indicates the progress in the `toppath` import. """ - def __init__(self, toppath=None, paths=None): - self.toppath = toppath - self.paths = paths + def __init__(self, toppath, paths): + super(SentinelImportTask, self).__init__(toppath, paths, ()) # TODO Remove the remaining attributes eventually - self.items = None self.should_remove_duplicates = False self.is_album = True self.choice_flag = None @@ -883,13 +922,20 @@ class SentinelImportTask(ImportTask): class ArchiveImportTask(SentinelImportTask): - """Additional methods for handling archives. + """An import task that represents the processing of an archive. - Use when `toppath` points to a `zip`, `tar`, or `rar` archive. + `toppath` must be a `zip`, `tar`, or `rar` archive. Archive tasks + serve two purposes: + - First, it will unarchive the files to a temporary directory and + return it. The client should read tasks from the resulting + directory and send them through the pipeline. + - Second, it will clean up the temporary directory when it proceeds + through the pipeline. The client should send the archive task + after sending the rest of the music tasks to make this work. """ def __init__(self, toppath): - super(ArchiveImportTask, self).__init__(toppath) + super(ArchiveImportTask, self).__init__(toppath, ()) self.extracted = False @classmethod @@ -901,7 +947,7 @@ class ArchiveImportTask(SentinelImportTask): return False for path_test, _ in cls.handlers(): - if path_test(path): + if path_test(util.py3_path(path)): return True return False @@ -933,6 +979,8 @@ class ArchiveImportTask(SentinelImportTask): """Removes the temporary directory the archive was extracted to. """ if self.extracted: + log.debug(u'Removing extracted directory: {0}', + displayable_path(self.toppath)) shutil.rmtree(self.toppath) def extract(self): @@ -940,12 +988,12 @@ class ArchiveImportTask(SentinelImportTask): `toppath` to that directory. """ for path_test, handler_class in self.handlers(): - if path_test(self.toppath): + if path_test(util.py3_path(self.toppath)): break try: extract_to = mkdtemp() - archive = handler_class(self.toppath, mode='r') + archive = handler_class(util.py3_path(self.toppath), mode='r') archive.extractall(extract_to) finally: archive.close() @@ -954,56 +1002,103 @@ class ArchiveImportTask(SentinelImportTask): class ImportTaskFactory(object): - """Create album and singleton import tasks for all media files in a - directory or path. - - Depending on the session's 'flat' and 'singleton' configuration, it - groups all media files contained in `toppath` into singleton or - album import tasks. + """Generate album and singleton import tasks for all media files + indicated by a path. """ def __init__(self, toppath, session): + """Create a new task factory. + + `toppath` is the user-specified path to search for music to + import. `session` is the `ImportSession`, which controls how + tasks are read from the directory. + """ self.toppath = toppath self.session = session - self.skipped = 0 + self.skipped = 0 # Skipped due to incremental/resume. + self.imported = 0 # "Real" tasks created. + self.is_archive = ArchiveImportTask.is_archive(syspath(toppath)) def tasks(self): - """Yield all import tasks for `self.toppath`. + """Yield all import tasks for music found in the user-specified + path `self.toppath`. Any necessary sentinel tasks are also + produced. - The behavior is configured by the session's 'flat', and - 'singleton' flags. + During generation, update `self.skipped` and `self.imported` + with the number of tasks that were not produced (due to + incremental mode or resumed imports) and the number of concrete + tasks actually produced, respectively. + + If `self.toppath` is an archive, it is adjusted to point to the + extracted data. """ + # Check whether this is an archive. + if self.is_archive: + archive_task = self.unarchive() + if not archive_task: + return + + # Search for music in the directory. for dirs, paths in self.paths(): if self.session.config['singletons']: for path in paths: - task = self.singleton(path) - if task: + tasks = self._create(self.singleton(path)) + for task in tasks: yield task yield self.sentinel(dirs) else: - task = self.album(paths, dirs) - if task: + tasks = self._create(self.album(paths, dirs)) + for task in tasks: yield task + # Produce the final sentinel for this toppath to indicate that + # it is finished. This is usually just a SentinelImportTask, but + # for archive imports, send the archive task instead (to remove + # the extracted directory). + if self.is_archive: + yield archive_task + else: + yield self.sentinel() + + def _create(self, task): + """Handle a new task to be emitted by the factory. + + Emit the `import_task_created` event and increment the + `imported` count if the task is not skipped. Return the same + task. If `task` is None, do nothing. + """ + if task: + tasks = task.handle_created(self.session) + self.imported += len(tasks) + return tasks + return [] + def paths(self): - """Walk `self.toppath` and yield pairs of directory lists and - path lists. + """Walk `self.toppath` and yield `(dirs, files)` pairs where + `files` are individual music files and `dirs` the set of + containing directories where the music was found. + + This can either be a recursive search in the ordinary case, a + single track when `toppath` is a file, a single directory in + `flat` mode. """ if not os.path.isdir(syspath(self.toppath)): - yield ([self.toppath], [self.toppath]) + yield [self.toppath], [self.toppath] elif self.session.config['flat']: paths = [] for dirs, paths_in_dir in albums_in_dir(self.toppath): paths += paths_in_dir - yield ([self.toppath], paths) + yield [self.toppath], paths else: for dirs, paths in albums_in_dir(self.toppath): - yield (dirs, paths) + yield dirs, paths def singleton(self, path): + """Return a `SingletonImportTask` for the music file. + """ if self.session.already_imported(self.toppath, [path]): - log.debug(u'Skipping previously-imported path: {0}' - .format(displayable_path(path))) + log.debug(u'Skipping previously-imported path: {0}', + displayable_path(path)) self.skipped += 1 return None @@ -1014,7 +1109,7 @@ class ImportTaskFactory(object): return None def album(self, paths, dirs=None): - """Return `ImportTask` with all media files from paths. + """Return a `ImportTask` with all media files from paths. `dirs` is a list of parent directories used to record already imported albums. @@ -1026,8 +1121,8 @@ class ImportTaskFactory(object): dirs = list(set(os.path.dirname(p) for p in paths)) if self.session.already_imported(self.toppath, dirs): - log.debug(u'Skipping previously-imported path: {0}' - .format(displayable_path(dirs))) + log.debug(u'Skipping previously-imported path: {0}', + displayable_path(dirs)) self.skipped += 1 return None @@ -1040,14 +1135,46 @@ class ImportTaskFactory(object): return None def sentinel(self, paths=None): + """Return a `SentinelImportTask` indicating the end of a + top-level directory import. + """ return SentinelImportTask(self.toppath, paths) - def read_item(self, path): - """Return an item created from the path. + def unarchive(self): + """Extract the archive for this `toppath`. - If an item could not be read it returns None and logs an error. + Extract the archive to a new directory, adjust `toppath` to + point to the extracted directory, and return an + `ArchiveImportTask`. If extraction fails, return None. + """ + assert self.is_archive + + if not (self.session.config['move'] or + self.session.config['copy']): + log.warning(u"Archive importing requires either " + u"'copy' or 'move' to be enabled.") + return + + log.debug(u'Extracting archive: {0}', + displayable_path(self.toppath)) + archive_task = ArchiveImportTask(self.toppath) + try: + archive_task.extract() + except Exception as exc: + log.error(u'extraction failed: {0}', exc) + return + + # Now read albums from the extracted directory. + self.toppath = archive_task.toppath + log.debug(u'Archive extracted to: {0}', self.toppath) + return archive_task + + def read_item(self, path): + """Return an `Item` read from the path. + + If an item cannot be read, return `None` instead and log an + error. """ - # TODO remove this method. Should be handled in ImportTask creation. try: return library.Item.from_path(path) except library.ReadError as exc: @@ -1055,14 +1182,10 @@ class ImportTaskFactory(object): # Silently ignore non-music files. pass elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warn(u'unreadable file: {0}'.format( - displayable_path(path)) - ) + log.warning(u'unreadable file: {0}', displayable_path(path)) else: - log.error(u'error reading {0}: {1}'.format( - displayable_path(path), - exc, - )) + log.error(u'error reading {0}: {1}', + displayable_path(path), exc) # Full-album pipeline stages. @@ -1074,50 +1197,22 @@ def read_tasks(session): """ skipped = 0 for toppath in session.paths: - # Determine if we want to resume import of the toppath + # Check whether we need to resume the import. session.ask_resume(toppath) - user_toppath = toppath - - # Extract archives. - archive_task = None - if ArchiveImportTask.is_archive(syspath(toppath)): - if not (session.config['move'] or session.config['copy']): - log.warn(u"Archive importing requires either " - "'copy' or 'move' to be enabled.") - continue - - log.debug(u'extracting archive {0}' - .format(displayable_path(toppath))) - archive_task = ArchiveImportTask(toppath) - try: - archive_task.extract() - except Exception as exc: - log.error(u'extraction failed: {0}'.format(exc)) - continue - - # Continue reading albums from the extracted directory. - toppath = archive_task.toppath + # Generate tasks. task_factory = ImportTaskFactory(toppath, session) - imported = False for t in task_factory.tasks(): - imported |= not t.skip yield t + skipped += task_factory.skipped - # Indicate the directory is finished. - # FIXME hack to delete extracted archives - if archive_task is None: - yield task_factory.sentinel() - else: - yield archive_task + if not task_factory.imported: + log.warning(u'No files imported from {0}', + displayable_path(toppath)) - if not imported: - log.warn(u'No files imported from {0}' - .format(displayable_path(user_toppath))) - - # Show skipped directories. + # Show skipped directories (due to incremental/resume). if skipped: - log.info(u'Skipped {0} directories.'.format(skipped)) + log.info(u'Skipped {0} paths.', skipped) def query_tasks(session): @@ -1128,13 +1223,15 @@ def query_tasks(session): if session.config['singletons']: # Search for items. for item in session.lib.items(session.query): - yield SingletonImportTask(None, item) + task = SingletonImportTask(None, item) + for task in task.handle_created(session): + yield task else: # Search for albums. for album in session.lib.albums(session.query): - log.debug(u'yielding album {0}: {1} - {2}' - .format(album.id, album.albumartist, album.album)) + log.debug(u'yielding album {0}: {1} - {2}', + album.id, album.albumartist, album.album) items = list(album.items()) # Clear IDs from re-tagged items so they appear "fresh" when @@ -1143,7 +1240,9 @@ def query_tasks(session): item.id = None item.album_id = None - yield ImportTask(None, [album.item_dir()], items) + task = ImportTask(None, [album.item_dir()], items) + for task in task.handle_created(session): + yield task @pipeline.mutator_stage @@ -1159,7 +1258,12 @@ def lookup_candidates(session, task): return plugins.send('import_task_start', session=session, task=task) - log.debug(u'Looking up: {0}'.format(displayable_path(task.paths))) + log.debug(u'Looking up: {0}', displayable_path(task.paths)) + + # Restrict the initial lookup to IDs specified by the user via the -m + # option. Currently all the IDs are passed onto the tasks directly. + task.search_ids = session.config['search_ids'].as_str_seq() + task.lookup_candidates() @@ -1170,7 +1274,7 @@ def user_query(session, task): The coroutine accepts an ImportTask objects. It uses the session's `choose_match` method to determine the `action` for - this task. Depending on the action additional stages are exectuted + this task. Depending on the action additional stages are executed and the processed task is yielded. It emits the ``import_task_choice`` event for plugins. Plugins have @@ -1189,7 +1293,9 @@ def user_query(session, task): # Set up a little pipeline for dealing with the singletons. def emitter(task): for item in task.items: - yield SingletonImportTask(task.toppath, item) + task = SingletonImportTask(task.toppath, item) + for new_task in task.handle_created(session): + yield new_task yield SentinelImportTask(task.toppath, task.paths) ipl = pipeline.Pipeline([ @@ -1210,6 +1316,7 @@ def user_query(session, task): return pipeline.multiple(ipl.pull()) resolve_duplicates(session, task) + apply_choice(session, task) return task @@ -1217,13 +1324,36 @@ def resolve_duplicates(session, task): """Check if a task conflicts with items or albums already imported and ask the session to resolve this. """ - if task.choice_flag in (action.ASIS, action.APPLY): - ident = task.chosen_ident() + if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) - if ident in session.seen_idents or found_duplicates: - session.resolve_duplicate(task, found_duplicates) + if found_duplicates: + log.debug(u'found duplicates: {}'.format( + [o.id for o in found_duplicates] + )) + + # Get the default action to follow from config. + duplicate_action = config['import']['duplicate_action'].as_choice({ + u'skip': u's', + u'keep': u'k', + u'remove': u'r', + u'ask': u'a', + }) + log.debug(u'default action for duplicates: {0}', duplicate_action) + + if duplicate_action == u's': + # Skip new. + task.set_choice(action.SKIP) + elif duplicate_action == u'k': + # Keep both. Do nothing; leave the choice intact. + pass + elif duplicate_action == u'r': + # Remove old. + task.should_remove_duplicates = True + else: + # No default action set; ask the session. + session.resolve_duplicate(task, found_duplicates) + session.log_choice(task, True) - session.seen_idents.add(ident) @pipeline.mutator_stage @@ -1236,14 +1366,14 @@ def import_asis(session, task): if task.skip: return - log.info(displayable_path(task.paths)) + log.info(u'{}', displayable_path(task.paths)) task.set_choice(action.ASIS) + apply_choice(session, task) -@pipeline.mutator_stage -def apply_choices(session, task): - """A coroutine for applying changes to albums and singletons during - the autotag process. +def apply_choice(session, task): + """Apply the task's choice to the Album or Item it contains and add + it to the library. """ if task.skip: return @@ -1288,6 +1418,7 @@ def manipulate_files(session, task): copy=session.config['copy'], write=session.config['write'], link=session.config['link'], + hardlink=session.config['hardlink'], session=session, ) @@ -1297,20 +1428,22 @@ def manipulate_files(session, task): @pipeline.stage def log_files(session, task): - """A coroutine (pipeline stage) to log each file which will be imported + """A coroutine (pipeline stage) to log each file to be imported. """ if isinstance(task, SingletonImportTask): - log.info( - 'Singleton: {0}'.format(displayable_path(task.item['path']))) + log.info(u'Singleton: {0}', displayable_path(task.item['path'])) elif task.items: - log.info('Album {0}'.format(displayable_path(task.paths[0]))) + log.info(u'Album: {0}', displayable_path(task.paths[0])) for item in task.items: - log.info(' {0}'.format(displayable_path(item['path']))) + log.info(u' {0}', displayable_path(item['path'])) def group_albums(session): - """Group the items of a task by albumartist and album name and create a new - task for each album. Yield the tasks as a multi message. + """A pipeline stage that groups the items of each task into albums + using their metadata. + + Groups are identified using their artist and album fields. The + pipeline stage emits new album tasks for each discovered group. """ def group(item): return (item.albumartist or item.artist, item.album) @@ -1321,15 +1454,19 @@ def group_albums(session): if task.skip: continue tasks = [] - for _, items in itertools.groupby(task.items, group): - tasks.append(ImportTask(items=list(items))) + sorted_items = sorted(task.items, key=group) + for _, items in itertools.groupby(sorted_items, group): + items = list(items) + task = ImportTask(task.toppath, [i.path for i in items], + items) + tasks += task.handle_created(session) tasks.append(SentinelImportTask(task.toppath, task.paths)) task = pipeline.multiple(tasks) -MULTIDISC_MARKERS = (r'dis[ck]', r'cd') -MULTIDISC_PAT_FMT = r'^(.*%s[\W_]*)\d' +MULTIDISC_MARKERS = (br'dis[ck]', br'cd') +MULTIDISC_PAT_FMT = br'^(.*%s[\W_]*)\d' def albums_in_dir(path): @@ -1340,8 +1477,11 @@ def albums_in_dir(path): """ collapse_pat = collapse_paths = collapse_items = None ignore = config['ignore'].as_str_seq() + ignore_hidden = config['ignore_hidden'].get(bool) - for root, dirs, files in sorted_walk(path, ignore=ignore, logger=log): + for root, dirs, files in sorted_walk(path, ignore=ignore, + ignore_hidden=ignore_hidden, + logger=log): items = [os.path.join(root, f) for f in files] # If we're currently collapsing the constituent directories in a # multi-disc album, check whether we should continue collapsing @@ -1369,7 +1509,9 @@ def albums_in_dir(path): # named in this way. start_collapsing = False for marker in MULTIDISC_MARKERS: - marker_pat = re.compile(MULTIDISC_PAT_FMT % marker, re.I) + # We're using replace on %s due to lack of .format() on bytestrings + p = MULTIDISC_PAT_FMT.replace(b'%s', marker) + marker_pat = re.compile(p, re.I) match = marker_pat.match(os.path.basename(root)) # Is this directory the root of a nested multi-disc album? @@ -1378,13 +1520,16 @@ def albums_in_dir(path): start_collapsing = True subdir_pat = None for subdir in dirs: + subdir = util.bytestring_path(subdir) # The first directory dictates the pattern for # the remaining directories. if not subdir_pat: match = marker_pat.match(subdir) if match: + match_group = re.escape(match.group(1)) subdir_pat = re.compile( - r'^%s\d' % re.escape(match.group(1)), re.I + b''.join([b'^', match_group, br'\d']), + re.I ) else: start_collapsing = False @@ -1406,7 +1551,8 @@ def albums_in_dir(path): # Set the current pattern to match directories with the same # prefix as this one, followed by a digit. collapse_pat = re.compile( - r'^%s\d' % re.escape(match.group(1)), re.I + b''.join([b'^', re.escape(match.group(1)), br'\d']), + re.I ) break diff --git a/lib/beets/library.py b/lib/beets/library.py old mode 100644 new mode 100755 index 1de1bba5..b263ecd6 --- a/lib/beets/library.py +++ b/lib/beets/library.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,15 +15,17 @@ """The core data store and collection logic for beets. """ +from __future__ import division, absolute_import, print_function + import os import sys -import logging -import shlex import unicodedata import time import re -from unidecode import unidecode -from beets.mediafile import MediaFile, MutagenError, UnreadableFileError +import six + +from beets import logging +from beets.mediafile import MediaFile, UnreadableFileError from beets import plugins from beets import util from beets.util import bytestring_path, syspath, normpath, samefile @@ -31,6 +34,14 @@ from beets import dbcore from beets.dbcore import types import beets +# To use the SQLite "blob" type, it doesn't suffice to provide a byte +# string; SQLite treats that as encoded text. Wrapping it in a `buffer` or a +# `memoryview`, depending on the Python version, tells it that we +# actually mean non-text data. +if six.PY2: + BLOB_TYPE = buffer # noqa: F821 +else: + BLOB_TYPE = memoryview log = logging.getLogger('beets') @@ -38,48 +49,91 @@ log = logging.getLogger('beets') # Library-specific query types. class PathQuery(dbcore.FieldQuery): - """A query that matches all items under a given path.""" + """A query that matches all items under a given path. - escape_re = re.compile(r'[\\_%]') - escape_char = '\\' + Matching can either be case-insensitive or case-sensitive. By + default, the behavior depends on the OS: case-insensitive on Windows + and case-sensitive otherwise. + """ - def __init__(self, field, pattern, fast=True): + def __init__(self, field, pattern, fast=True, case_sensitive=None): + """Create a path query. `pattern` must be a path, either to a + file or a directory. + + `case_sensitive` can be a bool or `None`, indicating that the + behavior should depend on the filesystem. + """ super(PathQuery, self).__init__(field, pattern, fast) + # By default, the case sensitivity depends on the filesystem + # that the query path is located on. + if case_sensitive is None: + path = util.bytestring_path(util.normpath(pattern)) + case_sensitive = beets.util.case_sensitive(path) + self.case_sensitive = case_sensitive + + # Use a normalized-case pattern for case-insensitive matches. + if not case_sensitive: + pattern = pattern.lower() + # Match the path as a single file. self.file_path = util.bytestring_path(util.normpath(pattern)) # As a directory (prefix). - self.dir_path = util.bytestring_path(os.path.join(self.file_path, '')) + self.dir_path = util.bytestring_path(os.path.join(self.file_path, b'')) + + @classmethod + def is_path_query(cls, query_part): + """Try to guess whether a unicode query part is a path query. + + Condition: separator precedes colon and the file exists. + """ + colon = query_part.find(':') + if colon != -1: + query_part = query_part[:colon] + + # Test both `sep` and `altsep` (i.e., both slash and backslash on + # Windows). + return ( + (os.sep in query_part or + (os.altsep and os.altsep in query_part)) and + os.path.exists(syspath(normpath(query_part))) + ) def match(self, item): - return (item.path == self.file_path) or \ - item.path.startswith(self.dir_path) + path = item.path if self.case_sensitive else item.path.lower() + return (path == self.file_path) or path.startswith(self.dir_path) - def clause(self): - escape = lambda m: self.escape_char + m.group(0) - dir_pattern = self.escape_re.sub(escape, self.dir_path) - dir_pattern = buffer(dir_pattern + '%') - file_blob = buffer(self.file_path) - return '({0} = ?) || ({0} LIKE ? ESCAPE ?)'.format(self.field), \ - (file_blob, dir_pattern, self.escape_char) + def col_clause(self): + file_blob = BLOB_TYPE(self.file_path) + dir_blob = BLOB_TYPE(self.dir_path) + + if self.case_sensitive: + query_part = '({0} = ?) || (substr({0}, 1, ?) = ?)' + else: + query_part = '(BYTELOWER({0}) = BYTELOWER(?)) || \ + (substr(BYTELOWER({0}), 1, ?) = BYTELOWER(?))' + + return query_part.format(self.field), \ + (file_blob, len(dir_blob), dir_blob) # Library-specific field types. class DateType(types.Float): # TODO representation should be `datetime` object - # TODO distinguish beetween date and time types + # TODO distinguish between date and time types query = dbcore.query.DateQuery def format(self, value): - return time.strftime(beets.config['time_format'].get(unicode), + return time.strftime(beets.config['time_format'].as_str(), time.localtime(value or 0)) def parse(self, string): try: # Try a formatted date string. return time.mktime( - time.strptime(string, beets.config['time_format'].get(unicode)) + time.strptime(string, + beets.config['time_format'].as_str()) ) except ValueError: # Fall back to a plain timestamp number. @@ -101,12 +155,11 @@ class PathType(types.Type): return normpath(bytestring_path(string)) def normalize(self, value): - if isinstance(value, unicode): + if isinstance(value, six.text_type): # Paths stored internally as encoded bytes. return bytestring_path(value) - elif isinstance(value, buffer): - # SQLite must store bytestings as buffers to avoid decoding. + elif isinstance(value, BLOB_TYPE): # We unwrap buffers to bytes. return bytes(value) @@ -117,8 +170,8 @@ class PathType(types.Type): return self.normalize(sql_value) def to_sql(self, value): - if isinstance(value, str): - value = buffer(value) + if isinstance(value, bytes): + value = BLOB_TYPE(value) return value @@ -140,6 +193,7 @@ class MusicalKey(types.String): for flat, sharp in self.ENHARMONIC.items(): key = re.sub(flat, sharp, key) key = re.sub(r'[\W\s]+minor', 'm', key) + key = re.sub(r'[\W\s]+major', '', key) return key.capitalize() def normalize(self, key): @@ -149,31 +203,57 @@ class MusicalKey(types.String): return self.parse(key) +class DurationType(types.Float): + """Human-friendly (M:SS) representation of a time interval.""" + query = dbcore.query.DurationQuery + + def format(self, value): + if not beets.config['format_raw_length'].get(bool): + return beets.ui.human_seconds_short(value or 0.0) + else: + return value + + def parse(self, string): + try: + # Try to format back hh:ss to seconds. + return util.raw_seconds_short(string) + except ValueError: + # Fall back to a plain float. + try: + return float(string) + except ValueError: + return self.null + + # Library-specific sort types. class SmartArtistSort(dbcore.query.Sort): """Sort by artist (either album artist or track artist), prioritizing the sort field over the raw field. """ - def __init__(self, model_cls, ascending=True): + def __init__(self, model_cls, ascending=True, case_insensitive=True): self.album = model_cls is Album self.ascending = ascending + self.case_insensitive = case_insensitive def order_clause(self): order = "ASC" if self.ascending else "DESC" - if self.album: - field = 'albumartist' - else: - field = 'artist' + field = 'albumartist' if self.album else 'artist' + collate = 'COLLATE NOCASE' if self.case_insensitive else '' return ('(CASE {0}_sort WHEN NULL THEN {0} ' 'WHEN "" THEN {0} ' - 'ELSE {0}_sort END) {1}').format(field, order) + 'ELSE {0}_sort END) {1} {2}').format(field, collate, order) def sort(self, objs): if self.album: - key = lambda a: a.albumartist_sort or a.albumartist + field = lambda a: a.albumartist_sort or a.albumartist else: - key = lambda i: i.artist_sort or i.artist + field = lambda i: i.artist_sort or i.artist + + if self.case_insensitive: + key = lambda x: field(x).lower() + else: + key = field return sorted(objs, key=key, reverse=not self.ascending) @@ -182,7 +262,7 @@ PF_KEY_DEFAULT = 'default' # Exceptions. - +@six.python_2_unicode_compatible class FileOperationError(Exception): """Indicates an error when interacting with a file on disk. Possibilities include an unsupported media type, a permissions @@ -196,55 +276,75 @@ class FileOperationError(Exception): self.path = path self.reason = reason - def __unicode__(self): + def text(self): """Get a string representing the error. Describes both the underlying reason and the file path in question. """ return u'{0}: {1}'.format( util.displayable_path(self.path), - unicode(self.reason) + six.text_type(self.reason) ) - def __str__(self): - return unicode(self).encode('utf8') + # define __str__ as text to avoid infinite loop on super() calls + # with @six.python_2_unicode_compatible + __str__ = text +@six.python_2_unicode_compatible class ReadError(FileOperationError): """An error while reading a file (i.e. in `Item.read`). """ - def __unicode__(self): - return u'error reading ' + super(ReadError, self).__unicode__() + def __str__(self): + return u'error reading ' + super(ReadError, self).text() +@six.python_2_unicode_compatible class WriteError(FileOperationError): """An error while writing a file (i.e. in `Item.write`). """ - def __unicode__(self): - return u'error writing ' + super(WriteError, self).__unicode__() + def __str__(self): + return u'error writing ' + super(WriteError, self).text() # Item and Album model classes. +@six.python_2_unicode_compatible class LibModel(dbcore.Model): """Shared concrete functionality for Items and Albums. """ + _format_config_key = None + """Config key that specifies how an instance should be formatted. + """ + def _template_funcs(self): funcs = DefaultTemplateFunctions(self, self._db).functions() funcs.update(plugins.template_funcs()) return funcs - def store(self): - super(LibModel, self).store() - plugins.send('database_change', lib=self._db) + def store(self, fields=None): + super(LibModel, self).store(fields) + plugins.send('database_change', lib=self._db, model=self) def remove(self): super(LibModel, self).remove() - plugins.send('database_change', lib=self._db) + plugins.send('database_change', lib=self._db, model=self) def add(self, lib=None): super(LibModel, self).add(lib) - plugins.send('database_change', lib=self._db) + plugins.send('database_change', lib=self._db, model=self) + + def __format__(self, spec): + if not spec: + spec = beets.config[self._format_config_key].as_str() + assert isinstance(spec, six.text_type) + return self.evaluate_template(spec) + + def __str__(self): + return format(self) + + def __bytes__(self): + return self.__str__().encode('utf-8') class FormattedItemMapping(dbcore.db.FormattedMapping): @@ -315,7 +415,9 @@ class Item(LibModel): 'albumartist_sort': types.STRING, 'albumartist_credit': types.STRING, 'genre': types.STRING, + 'lyricist': types.STRING, 'composer': types.STRING, + 'arranger': types.STRING, 'grouping': types.STRING, 'year': types.PaddedInt(4), 'month': types.PaddedInt(2), @@ -356,7 +458,7 @@ class Item(LibModel): 'original_day': types.PaddedInt(2), 'initial_key': MusicalKey(), - 'length': types.FLOAT, + 'length': DurationType(), 'bitrate': types.ScaledInt(1000, u'kbps'), 'format': types.STRING, 'samplerate': types.ScaledInt(1000, u'kHz'), @@ -369,6 +471,10 @@ class Item(LibModel): _search_fields = ('artist', 'title', 'comments', 'album', 'albumartist', 'genre') + _types = { + 'data_source': types.STRING, + } + _media_fields = set(MediaFile.readable_fields()) \ .intersection(_fields.keys()) """Set of item fields that are backed by `MediaFile` fields. @@ -378,14 +484,25 @@ class Item(LibModel): `write`. """ + _media_tag_fields = set(MediaFile.fields()).intersection(_fields.keys()) + """Set of item fields that are backed by *writable* `MediaFile` tag + fields. + + This excludes fields that represent audio data, such as `bitrate` or + `length`. + """ + _formatter = FormattedItemMapping _sorts = {'artist': SmartArtistSort} + _format_config_key = 'format_item' + @classmethod def _getters(cls): getters = plugins.item_field_getters() getters['singleton'] = lambda i: i.album_id is None + getters['filesize'] = Item.try_filesize # In bytes. return getters @classmethod @@ -403,10 +520,10 @@ class Item(LibModel): """ # Encode unicode paths and read buffers. if key == 'path': - if isinstance(value, unicode): + if isinstance(value, six.text_type): value = bytestring_path(value) - elif isinstance(value, buffer): - value = str(value) + elif isinstance(value, BLOB_TYPE): + value = bytes(value) if key in MediaFile.fields(): self.mtime = 0 # Reset mtime on dirty. @@ -447,16 +564,13 @@ class Item(LibModel): read_path = normpath(read_path) try: mediafile = MediaFile(syspath(read_path)) - except (OSError, IOError, UnreadableFileError) as exc: + except UnreadableFileError as exc: raise ReadError(read_path, exc) for key in self._media_fields: value = getattr(mediafile, key) - if isinstance(value, (int, long)): - # Filter values wider than 64 bits (in signed representation). - # SQLite cannot store them. py26: Post transition, we can use: - # value.bit_length() > 63 - if abs(value) >= 2 ** 63: + if isinstance(value, six.integer_types): + if value.bit_length() > 63: value = 0 self[key] = value @@ -466,12 +580,18 @@ class Item(LibModel): self.path = read_path - def write(self, path=None): + def write(self, path=None, tags=None): """Write the item's metadata to a media file. All fields in `_media_fields` are written to disk according to the values on this object. + `path` is the path of the mediafile to write the data to. It + defaults to the item's path. + + `tags` is a dictionary of additional metadata the should be + written to the file. (These tags need not be in `_media_fields`.) + Can raise either a `ReadError` or a `WriteError`. """ if path is None: @@ -479,19 +599,26 @@ class Item(LibModel): else: path = normpath(path) - tags = dict(self) - plugins.send('write', item=self, path=path, tags=tags) + # Get the data to write to the file. + item_tags = dict(self) + item_tags = {k: v for k, v in item_tags.items() + if k in self._media_fields} # Only write media fields. + if tags is not None: + item_tags.update(tags) + plugins.send('write', item=self, path=path, tags=item_tags) + # Open the file. try: mediafile = MediaFile(syspath(path), id3v23=beets.config['id3v23'].get(bool)) - except (OSError, IOError, UnreadableFileError) as exc: + except UnreadableFileError as exc: raise ReadError(self.path, exc) - mediafile.update(tags) + # Write the tags to the file. + mediafile.update(item_tags) try: mediafile.save() - except (OSError, IOError, MutagenError) as exc: + except UnreadableFileError as exc: raise WriteError(self.path, exc) # The file has a new mtime. @@ -499,38 +626,44 @@ class Item(LibModel): self.mtime = self.current_mtime() plugins.send('after_write', item=self, path=path) - def try_write(self, path=None): + def try_write(self, path=None, tags=None): """Calls `write()` but catches and logs `FileOperationError` exceptions. Returns `False` an exception was caught and `True` otherwise. """ try: - self.write(path) + self.write(path, tags) return True except FileOperationError as exc: - log.error(exc) + log.error(u"{0}", exc) return False - def try_sync(self, write=None): - """Synchronize the item with the database and the media file - tags, updating them with this object's current state. + def try_sync(self, write, move, with_album=True): + """Synchronize the item with the database and, possibly, updates its + tags on disk and its path (by moving the file). - By default, the current `path` for the item is used to write - tags. If `write` is `False`, no tags are written. If `write` is - a path, tags are written to that file instead. + `write` indicates whether to write new tags into the file. Similarly, + `move` controls whether the path should be updated. In the + latter case, files are *only* moved when they are inside their + library's directory (if any). - Similar to calling :meth:`write` and :meth:`store`. + Similar to calling :meth:`write`, :meth:`move`, and :meth:`store` + (conditionally). """ - if write is True: - write = None - if write is not False: - self.try_write(path=write) + if write: + self.try_write() + if move: + # Check whether this file is inside the library directory. + if self._db and self._db.directory in util.ancestry(self.path): + log.debug(u'moving {0} to synchronize path', + util.displayable_path(self.path)) + self.move(with_album=with_album) self.store() # Files themselves. - def move_file(self, dest, copy=False, link=False): + def move_file(self, dest, copy=False, link=False, hardlink=False): """Moves or copies the item's file, updating the path value if the move succeeds. If a file exists at ``dest``, then it is slightly modified to be unique. @@ -545,6 +678,10 @@ class Item(LibModel): util.link(self.path, dest) plugins.send("item_linked", item=self, source=self.path, destination=dest) + elif hardlink: + util.hardlink(self.path, dest) + plugins.send("item_hardlinked", item=self, source=self.path, + destination=dest) else: plugins.send("before_item_moved", item=self, source=self.path, destination=dest) @@ -561,6 +698,17 @@ class Item(LibModel): """ return int(os.path.getmtime(syspath(self.path))) + def try_filesize(self): + """Get the size of the underlying file in bytes. + + If the file is missing, return 0 (and log a warning). + """ + try: + return os.path.getsize(syspath(self.path)) + except (OSError, Exception) as exc: + log.warning(u'could not get filesize: {0}', exc) + return 0 + # Model methods. def remove(self, delete=False, with_album=True): @@ -586,14 +734,16 @@ class Item(LibModel): self._db._memotable = {} - def move(self, copy=False, link=False, basedir=None, with_album=True): + def move(self, copy=False, link=False, hardlink=False, basedir=None, + with_album=True, store=True): """Move the item to its designated location within the library directory (provided by destination()). Subdirectories are created as needed. If the operation succeeds, the item's path field is updated to reflect the new location. If `copy` is true, moving the file is copied rather than moved. - Similarly, `link` creates a symlink instead. + Similarly, `link` creates a symlink instead, and `hardlink` + creates a hardlink. basedir overrides the library base directory for the destination. @@ -602,10 +752,11 @@ class Item(LibModel): move its art. (This can be disabled by passing with_album=False.) - The item is stored to the database if it is in the database, so - any dirty fields prior to the move() call will be written as a - side effect. You probably want to call save() to commit the DB - transaction. + By default, the item is stored to the database if it is in the + database, so any dirty fields prior to the move() call will be written + as a side effect. You probably want to call save() to commit the DB + transaction. If `store` is true however, the item won't be stored, and + you'll have to manually store it after invoking this method. """ self._check_db() dest = self.destination(basedir=basedir) @@ -615,15 +766,17 @@ class Item(LibModel): # Perform the move and store the change. old_path = self.path - self.move_file(dest, copy, link) - self.store() + self.move_file(dest, copy, link, hardlink) + if store: + self.store() # If this item is in an album, move its art. if with_album: album = self.get_album() if album: album.move_art(copy) - album.store() + if store: + album.store() # Prune vacated directory. if not copy: @@ -661,7 +814,7 @@ class Item(LibModel): if query == PF_KEY_DEFAULT: break else: - assert False, "no default path format" + assert False, u"no default path format" if isinstance(path_format, Template): subpath_tmpl = path_format else: @@ -677,31 +830,32 @@ class Item(LibModel): subpath = unicodedata.normalize('NFC', subpath) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) - # Truncate components and remove forbidden characters. - subpath = util.sanitize_path(subpath, self._db.replacements) - - # Encode for the filesystem. - if not fragment: - subpath = bytestring_path(subpath) - - # Preserve extension. - _, extension = os.path.splitext(self.path) - if fragment: - # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') - subpath += extension.lower() - - # Truncate too-long components. maxlen = beets.config['max_filename_length'].get(int) if not maxlen: # When zero, try to determine from filesystem. maxlen = util.max_filename_length(self._db.directory) - subpath = util.truncate_path(subpath, maxlen) + + subpath, fellback = util.legalize_path( + subpath, self._db.replacements, maxlen, + os.path.splitext(self.path)[1], fragment + ) + if fellback: + # Print an error message if legalization fell back to + # default replacements because of the maximum length. + log.warning( + u'Fell back to default replacements when naming ' + u'file {}. Configure replacements to avoid lengthening ' + u'the filename.', + subpath + ) if fragment: - return subpath + return util.as_string(subpath) else: return normpath(os.path.join(basedir, subpath)) @@ -727,7 +881,6 @@ class Album(LibModel): 'year': types.PaddedInt(4), 'month': types.PaddedInt(2), 'day': types.PaddedInt(2), - 'tracktotal': types.PaddedInt(2), 'disctotal': types.PaddedInt(2), 'comp': types.BOOLEAN, 'mb_albumid': types.STRING, @@ -751,6 +904,11 @@ class Album(LibModel): _search_fields = ('album', 'albumartist', 'genre') + _types = { + 'path': PathType(), + 'data_source': types.STRING, + } + _sorts = { 'albumartist': SmartArtistSort, 'artist': SmartArtistSort, @@ -766,7 +924,6 @@ class Album(LibModel): 'year', 'month', 'day', - 'tracktotal', 'disctotal', 'comp', 'mb_albumid', @@ -790,12 +947,15 @@ class Album(LibModel): """List of keys that are set on an album's items. """ + _format_config_key = 'format_album' + @classmethod def _getters(cls): # In addition to plugin-provided computed fields, also expose # the album's directory as `path`. getters = plugins.album_field_getters() getters['path'] = Album.item_dir + getters['albumtotal'] = Album._albumtotal return getters def items(self): @@ -824,7 +984,7 @@ class Album(LibModel): for item in self.items(): item.remove(delete, False) - def move_art(self, copy=False, link=False): + def move_art(self, copy=False, link=False, hardlink=False): """Move or copy any existing album art so that it remains in the same directory as the items. """ @@ -837,13 +997,15 @@ class Album(LibModel): return new_art = util.unique_path(new_art) - log.debug(u'moving album art {0} to {1}' - .format(util.displayable_path(old_art), - util.displayable_path(new_art))) + log.debug(u'moving album art {0} to {1}', + util.displayable_path(old_art), + util.displayable_path(new_art)) if copy: util.copy(old_art, new_art) elif link: util.link(old_art, new_art) + elif hardlink: + util.hardlink(old_art, new_art) else: util.move(old_art, new_art) self.artpath = new_art @@ -853,26 +1015,32 @@ class Album(LibModel): util.prune_dirs(os.path.dirname(old_art), self._db.directory) - def move(self, copy=False, link=False, basedir=None): + def move(self, copy=False, link=False, hardlink=False, basedir=None, + store=True): """Moves (or copies) all items to their destination. Any album art moves along with them. basedir overrides the library base - directory for the destination. The album is stored to the - database, persisting any modifications to its metadata. + directory for the destination. By default, the album is stored to the + database, persisting any modifications to its metadata. If `store` is + true however, the album is not stored automatically, and you'll have + to manually store it after invoking this method. """ basedir = basedir or self._db.directory # Ensure new metadata is available to items for destination # computation. - self.store() + if store: + self.store() # Move items. items = list(self.items()) for item in items: - item.move(copy, link, basedir=basedir, with_album=False) + item.move(copy, link, hardlink, basedir=basedir, with_album=False, + store=store) # Move art. - self.move_art(copy, link) - self.store() + self.move_art(copy, link, hardlink) + if store: + self.store() def item_dir(self): """Returns the directory containing the album's first item, @@ -880,9 +1048,30 @@ class Album(LibModel): """ item = self.items().get() if not item: - raise ValueError('empty album') + raise ValueError(u'empty album') return os.path.dirname(item.path) + def _albumtotal(self): + """Return the total number of tracks on all discs on the album + """ + if self.disctotal == 1 or not beets.config['per_disc_numbering']: + return self.items()[0].tracktotal + + counted = [] + total = 0 + + for item in self.items(): + if item.disc in counted: + continue + + total += item.tracktotal + counted.append(item.disc) + + if len(counted) == self.disctotal: + break + + return total + def art_destination(self, image, item_dir=None): """Returns a path to the destination for the album art image for the album. `image` is the path of the image that will be @@ -895,10 +1084,14 @@ class Album(LibModel): image = bytestring_path(image) item_dir = item_dir or self.item_dir() - filename_tmpl = Template(beets.config['art_filename'].get(unicode)) + filename_tmpl = Template( + beets.config['art_filename'].as_str()) subpath = self.evaluate_template(filename_tmpl, True) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) subpath = util.sanitize_path(subpath, replacements=self._db.replacements) subpath = bytestring_path(subpath) @@ -912,6 +1105,8 @@ class Album(LibModel): """Sets the album's cover art to the image at the given path. The image is copied (or moved) into place, replacing any existing art. + + Sends an 'art_set' event with `self` as the sole argument. """ path = bytestring_path(path) oldart = self.artpath @@ -935,9 +1130,13 @@ class Album(LibModel): util.move(path, artdest) self.artpath = artdest - def store(self): + plugins.send('art_set', album=self) + + def store(self, fields=None): """Update the database with the album information. The album's tracks are also updated. + :param fields: The fields to be stored. If not specified, all fields + will be. """ # Get modified track fields. track_updates = {} @@ -946,22 +1145,25 @@ class Album(LibModel): track_updates[key] = self[key] with self._db.transaction(): - super(Album, self).store() + super(Album, self).store(fields) if track_updates: for item in self.items(): for key, value in track_updates.items(): item[key] = value item.store() - def try_sync(self, write=True): - """Synchronize the album and its items with the database and - their files by updating them with this object's current state. + def try_sync(self, write, move): + """Synchronize the album and its items with the database. + Optionally, also write any new tags into the files and update + their paths. - `write` indicates whether to write tags to the item files. + `write` indicates whether to write tags to the item files, and + `move` controls whether files (both audio and album art) are + moved. """ self.store() for item in self.items(): - item.try_sync(bool(write)) + item.try_sync(write, move) # Query construction helpers. @@ -979,26 +1181,24 @@ def parse_query_parts(parts, model_cls): # Special-case path-like queries, which are non-field queries # containing path separators (/). - if 'path' in model_cls._fields: - path_parts = [] - non_path_parts = [] - for s in parts: - if s.find(os.sep, 0, s.find(':')) != -1: - # Separator precedes colon. - path_parts.append(s) - else: - non_path_parts.append(s) - else: - path_parts = () - non_path_parts = parts + path_parts = [] + non_path_parts = [] + for s in parts: + if PathQuery.is_path_query(s): + path_parts.append(s) + else: + non_path_parts.append(s) query, sort = dbcore.parse_sorted_query( model_cls, non_path_parts, prefixes ) # Add path queries to aggregate query. - if path_parts: - query.subqueries += [PathQuery('path', s) for s in path_parts] + # Match field / flexattr depending on whether the model has the path field + fast_path_query = 'path' in model_cls._fields + query.subqueries += [PathQuery('path', s, fast_path_query) + for s in path_parts] + return query, sort @@ -1008,15 +1208,28 @@ def parse_query_string(s, model_cls): The string is split into components using shell-like syntax. """ - # A bug in Python < 2.7.3 prevents correct shlex splitting of - # Unicode strings. - # http://bugs.python.org/issue6988 - if isinstance(s, unicode): - s = s.encode('utf8') - parts = [p.decode('utf8') for p in shlex.split(s)] + message = u"Query is not unicode: {0!r}".format(s) + assert isinstance(s, six.text_type), message + try: + parts = util.shlex_split(s) + except ValueError as exc: + raise dbcore.InvalidQueryError(s, exc) return parse_query_parts(parts, model_cls) +def _sqlite_bytelower(bytestring): + """ A custom ``bytelower`` sqlite function so we can compare + bytestrings in a semi case insensitive fashion. This is to work + around sqlite builds are that compiled with + ``-DSQLITE_LIKE_DOESNT_MATCH_BLOBS``. See + ``https://github.com/beetbox/beets/issues/2172`` for details. + """ + if not six.PY2: + return bytestring.lower() + + return buffer(bytes(bytestring).lower()) # noqa: F821 + + # The Library: interface to the database. class Library(dbcore.Database): @@ -1029,9 +1242,8 @@ class Library(dbcore.Database): path_formats=((PF_KEY_DEFAULT, '$artist/$album/$track $title'),), replacements=None): - if path != ':memory:': - self.path = bytestring_path(normpath(path)) - super(Library, self).__init__(path) + timeout = beets.config['timeout'].as_number() + super(Library, self).__init__(path, timeout=timeout) self.directory = bytestring_path(normpath(directory)) self.path_formats = path_formats @@ -1039,6 +1251,11 @@ class Library(dbcore.Database): self._memotable = {} # Used for template substitution performance. + def _create_connection(self): + conn = super(Library, self)._create_connection() + conn.create_function('bytelower', 1, _sqlite_bytelower) + return conn + # Adding objects to the database. def add(self, obj): @@ -1083,11 +1300,14 @@ class Library(dbcore.Database): in the query string the `sort` argument is ignored. """ # Parse the query, if necessary. - parsed_sort = None - if isinstance(query, basestring): - query, parsed_sort = parse_query_string(query, model_cls) - elif isinstance(query, (list, tuple)): - query, parsed_sort = parse_query_parts(query, model_cls) + try: + parsed_sort = None + if isinstance(query, six.string_types): + query, parsed_sort = parse_query_string(query, model_cls) + elif isinstance(query, (list, tuple)): + query, parsed_sort = parse_query_parts(query, model_cls) + except dbcore.query.InvalidQueryArgumentTypeError as exc: + raise dbcore.InvalidQueryError(query, exc) # Any non-null sort specified by the parsed query overrides the # provided sort. @@ -1098,21 +1318,29 @@ class Library(dbcore.Database): model_cls, query, sort ) + @staticmethod + def get_default_album_sort(): + """Get a :class:`Sort` object for albums from the config option. + """ + return dbcore.sort_from_strings( + Album, beets.config['sort_album'].as_str_seq()) + + @staticmethod + def get_default_item_sort(): + """Get a :class:`Sort` object for items from the config option. + """ + return dbcore.sort_from_strings( + Item, beets.config['sort_item'].as_str_seq()) + def albums(self, query=None, sort=None): """Get :class:`Album` objects matching the query. """ - sort = sort or dbcore.sort_from_strings( - Album, beets.config['sort_album'].as_str_seq() - ) - return self._fetch(Album, query, sort) + return self._fetch(Album, query, sort or self.get_default_album_sort()) def items(self, query=None, sort=None): """Get :class:`Item` objects matching the query. """ - sort = sort or dbcore.sort_from_strings( - Item, beets.config['sort_item'].as_str_seq() - ) - return self._fetch(Item, query, sort) + return self._fetch(Item, query, sort or self.get_default_item_sort()) # Convenience accessors. @@ -1154,7 +1382,7 @@ class DefaultTemplateFunctions(object): _prefix = 'tmpl_' def __init__(self, item=None, lib=None): - """Paramaterize the functions. If `item` or `lib` is None, then + """Parametrize the functions. If `item` or `lib` is None, then some functions (namely, ``aunique``) will always evaluate to the empty string. """ @@ -1218,22 +1446,24 @@ class DefaultTemplateFunctions(object): def tmpl_asciify(s): """Translate non-ASCII characters to their ASCII equivalents. """ - return unidecode(s) + return util.asciify_path(s, beets.config['path_sep_replace'].as_str()) @staticmethod - def tmpl_time(s, format): + def tmpl_time(s, fmt): """Format a time value using `strftime`. """ - cur_fmt = beets.config['time_format'].get(unicode) - return time.strftime(format, time.strptime(s, cur_fmt)) + cur_fmt = beets.config['time_format'].as_str() + return time.strftime(fmt, time.strptime(s, cur_fmt)) - def tmpl_aunique(self, keys=None, disam=None): + def tmpl_aunique(self, keys=None, disam=None, bracket=None): """Generate a string that is guaranteed to be unique among all albums in the library who share the same set of keys. A fields from "disam" is used in the string if one is sufficient to disambiguate the albums. Otherwise, a fallback opaque value is used. Both "keys" and "disam" should be given as - whitespace-separated lists of field names. + whitespace-separated lists of field names, while "bracket" is a + pair of characters to be used as brackets surrounding the + disambiguator or empty to have no brackets. """ # Fast paths: no album, no item or library, or memoized value. if not self.item or not self.lib: @@ -1247,9 +1477,19 @@ class DefaultTemplateFunctions(object): keys = keys or 'albumartist album' disam = disam or 'albumtype year label catalognum albumdisambig' + if bracket is None: + bracket = '[]' keys = keys.split() disam = disam.split() + # Assign a left and right bracket or leave blank if argument is empty. + if len(bracket) == 2: + bracket_l = bracket[0] + bracket_r = bracket[1] + else: + bracket_l = u'' + bracket_r = u'' + album = self.lib.get_album(self.item) if not album: # Do nothing for singletons. @@ -1259,7 +1499,7 @@ class DefaultTemplateFunctions(object): # Find matching albums to disambiguate with. subqueries = [] for key in keys: - value = getattr(album, key) + value = album.get(key, '') subqueries.append(dbcore.MatchQuery(key, value)) albums = self.lib.albums(dbcore.AndQuery(subqueries)) @@ -1272,7 +1512,7 @@ class DefaultTemplateFunctions(object): # Find the first disambiguator that distinguishes the albums. for disambiguator in disam: # Get the value for each album for the current field. - disam_values = set([getattr(a, disambiguator) for a in albums]) + disam_values = set([a.get(disambiguator, '') for a in albums]) # If the set of unique values is equal to the number of # albums in the disambiguation set, we're done -- this is @@ -1282,16 +1522,51 @@ class DefaultTemplateFunctions(object): else: # No disambiguator distinguished all fields. - res = u' {0}'.format(album.id) + res = u' {1}{0}{2}'.format(album.id, bracket_l, bracket_r) self.lib._memotable[memokey] = res return res # Flatten disambiguation value into a string. disam_value = album.formatted(True).get(disambiguator) - res = u' [{0}]'.format(disam_value) + + # Return empty string if disambiguator is empty. + if disam_value: + res = u' {1}{0}{2}'.format(disam_value, bracket_l, bracket_r) + else: + res = u'' + self.lib._memotable[memokey] = res return res + @staticmethod + def tmpl_first(s, count=1, skip=0, sep=u'; ', join_str=u'; '): + """ Gets the item(s) from x to y in a string separated by something + and join then with something + + :param s: the string + :param count: The number of items included + :param skip: The number of items skipped + :param sep: the separator. Usually is '; ' (default) or '/ ' + :param join_str: the string which will join the items, default '; '. + """ + skip = int(skip) + count = skip + int(count) + return join_str.join(s.split(sep)[skip:count]) + + def tmpl_ifdef(self, field, trueval=u'', falseval=u''): + """ If field exists return trueval or the field (default) + otherwise, emit return falseval (if provided). + + :param field: The name of the field + :param trueval: The string if the condition is true + :param falseval: The string if the condition is false + :return: The string, based on condition + """ + if self.item.formatted().get(field): + return trueval if trueval else self.item.formatted().get(field) + else: + return falseval + # Get the name of tmpl_* functions in the above class. DefaultTemplateFunctions._func_names = \ diff --git a/lib/beets/logging.py b/lib/beets/logging.py new file mode 100755 index 00000000..d5ec7b73 --- /dev/null +++ b/lib/beets/logging.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A drop-in replacement for the standard-library `logging` module that +allows {}-style log formatting on Python 2 and 3. + +Provides everything the "logging" module does. The only difference is +that when getLogger(name) instantiates a logger that logger uses +{}-style formatting. +""" + +from __future__ import division, absolute_import, print_function + +from copy import copy +from logging import * # noqa +import subprocess +import threading +import six + + +def logsafe(val): + """Coerce a potentially "problematic" value so it can be formatted + in a Unicode log string. + + This works around a number of pitfalls when logging objects in + Python 2: + - Logging path names, which must be byte strings, requires + conversion for output. + - Some objects, including some exceptions, will crash when you call + `unicode(v)` while `str(v)` works fine. CalledProcessError is an + example. + """ + # Already Unicode. + if isinstance(val, six.text_type): + return val + + # Bytestring: needs decoding. + elif isinstance(val, bytes): + # Blindly convert with UTF-8. Eventually, it would be nice to + # (a) only do this for paths, if they can be given a distinct + # type, and (b) warn the developer if they do this for other + # bytestrings. + return val.decode('utf-8', 'replace') + + # A "problem" object: needs a workaround. + elif isinstance(val, subprocess.CalledProcessError): + try: + return six.text_type(val) + except UnicodeDecodeError: + # An object with a broken __unicode__ formatter. Use __str__ + # instead. + return str(val).decode('utf-8', 'replace') + + # Other objects are used as-is so field access, etc., still works in + # the format string. + else: + return val + + +class StrFormatLogger(Logger): + """A version of `Logger` that uses `str.format`-style formatting + instead of %-style formatting. + """ + + class _LogMessage(object): + def __init__(self, msg, args, kwargs): + self.msg = msg + self.args = args + self.kwargs = kwargs + + def __str__(self): + args = [logsafe(a) for a in self.args] + kwargs = dict((k, logsafe(v)) for (k, v) in self.kwargs.items()) + return self.msg.format(*args, **kwargs) + + def _log(self, level, msg, args, exc_info=None, extra=None, **kwargs): + """Log msg.format(*args, **kwargs)""" + m = self._LogMessage(msg, args, kwargs) + return super(StrFormatLogger, self)._log(level, m, (), exc_info, extra) + + +class ThreadLocalLevelLogger(Logger): + """A version of `Logger` whose level is thread-local instead of shared. + """ + def __init__(self, name, level=NOTSET): + self._thread_level = threading.local() + self.default_level = NOTSET + super(ThreadLocalLevelLogger, self).__init__(name, level) + + @property + def level(self): + try: + return self._thread_level.level + except AttributeError: + self._thread_level.level = self.default_level + return self.level + + @level.setter + def level(self, value): + self._thread_level.level = value + + def set_global_level(self, level): + """Set the level on the current thread + the default value for all + threads. + """ + self.default_level = level + self.setLevel(level) + + +class BeetsLogger(ThreadLocalLevelLogger, StrFormatLogger): + pass + + +my_manager = copy(Logger.manager) +my_manager.loggerClass = BeetsLogger + + +def getLogger(name=None): # noqa + if name: + return my_manager.getLogger(name) + else: + return Logger.root diff --git a/lib/beets/mediafile.py b/lib/beets/mediafile.py old mode 100644 new mode 100755 index 49ef1037..13f1b2df --- a/lib/beets/mediafile.py +++ b/lib/beets/mediafile.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -32,32 +33,32 @@ Internally ``MediaFile`` uses ``MediaField`` descriptors to access the data from the tags. In turn ``MediaField`` uses a number of ``StorageStyle`` strategies to handle format specific logic. """ +from __future__ import division, absolute_import, print_function + import mutagen -import mutagen.mp3 -import mutagen.oggopus -import mutagen.oggvorbis +import mutagen.id3 import mutagen.mp4 import mutagen.flac -import mutagen.monkeysaudio import mutagen.asf -import mutagen.aiff + +import codecs import datetime import re import base64 +import binascii import math import struct import imghdr import os -import logging import traceback import enum - -from beets.util import displayable_path +import logging +import six __all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] -log = logging.getLogger('beets') +log = logging.getLogger(__name__) # Human-readable type names. TYPES = { @@ -72,16 +73,19 @@ TYPES = { 'mpc': 'Musepack', 'asf': 'Windows Media', 'aiff': 'AIFF', + 'dsf': 'DSD Stream File', } +PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'} + # Exceptions. class UnreadableFileError(Exception): """Mutagen is not able to extract information from the file. """ - def __init__(self, path): - Exception.__init__(self, displayable_path(path)) + def __init__(self, path, msg): + Exception.__init__(self, msg if msg else repr(path)) class FileTypeError(UnreadableFileError): @@ -91,11 +95,10 @@ class FileTypeError(UnreadableFileError): mutagen type is not supported by `Mediafile`. """ def __init__(self, path, mutagen_type=None): - path = displayable_path(path) if mutagen_type is None: - msg = path + msg = repr(path) else: - msg = u'{0}: of mutagen type {1}'.format(path, mutagen_type) + msg = u'{0}: of mutagen type {1}'.format(repr(path), mutagen_type) Exception.__init__(self, msg) @@ -103,10 +106,37 @@ class MutagenError(UnreadableFileError): """Raised when Mutagen fails unexpectedly---probably due to a bug. """ def __init__(self, path, mutagen_exc): - msg = u'{0}: {1}'.format(displayable_path(path), mutagen_exc) + msg = u'{0}: {1}'.format(repr(path), mutagen_exc) Exception.__init__(self, msg) +# Interacting with Mutagen. + +def mutagen_call(action, path, func, *args, **kwargs): + """Call a Mutagen function with appropriate error handling. + + `action` is a string describing what the function is trying to do, + and `path` is the relevant filename. The rest of the arguments + describe the callable to invoke. + + We require at least Mutagen 1.33, where `IOError` is *never* used, + neither for internal parsing errors *nor* for ordinary IO error + conditions such as a bad filename. Mutagen-specific parsing errors and IO + errors are reraised as `UnreadableFileError`. Other exceptions + raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. + """ + try: + return func(*args, **kwargs) + except mutagen.MutagenError as exc: + log.debug(u'%s failed: %s', action, six.text_type(exc)) + raise UnreadableFileError(path, six.text_type(exc)) + except Exception as exc: + # Isolate bugs in Mutagen. + log.debug(u'%s', traceback.format_exc()) + log.error(u'uncaught Mutagen exception in %s: %s', action, exc) + raise MutagenError(path, exc) + + # Utility. def _safe_cast(out_type, val): @@ -124,8 +154,8 @@ def _safe_cast(out_type, val): return int(val) else: # Process any other type as a string. - if not isinstance(val, basestring): - val = unicode(val) + if not isinstance(val, six.string_types): + val = six.text_type(val) # Get a number from the front of the string. val = re.match(r'[0-9]*', val.strip()).group(0) if not val: @@ -140,21 +170,24 @@ def _safe_cast(out_type, val): except ValueError: return False - elif out_type == unicode: - if isinstance(val, str): - return val.decode('utf8', 'ignore') - elif isinstance(val, unicode): + elif out_type == six.text_type: + if isinstance(val, bytes): + return val.decode('utf-8', 'ignore') + elif isinstance(val, six.text_type): return val else: - return unicode(val) + return six.text_type(val) elif out_type == float: if isinstance(val, int) or isinstance(val, float): return float(val) else: - if not isinstance(val, basestring): - val = unicode(val) - match = re.match(r'[\+-]?[0-9\.]+', val.strip()) + if isinstance(val, bytes): + val = val.decode('utf-8', 'ignore') + else: + val = six.text_type(val) + match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', + val.strip()) if match: val = match.group(0) if val: @@ -176,15 +209,15 @@ def _unpack_asf_image(data): of exceptions (out-of-bounds, etc.). We should clean this up sometime so that the failure modes are well-defined. """ - type, size = struct.unpack_from(" 0 else None + text_delimiter_index = frame.value.find(b'\x00') + if text_delimiter_index > 0: + comment = frame.value[0:text_delimiter_index] + comment = comment.decode('utf-8', 'replace') + else: + comment = None image_data = frame.value[text_delimiter_index + 1:] images.append(Image(data=image_data, type=cover_type, desc=comment)) @@ -1007,7 +1142,7 @@ class APEv2ImageStorageStyle(ListStorageStyle): for image in values: image_type = image.type or ImageType.other comment = image.desc or '' - image_data = comment + "\x00" + image.data + image_data = comment.encode('utf-8') + b'\x00' + image.data cover_tag = self.TAG_NAMES[image_type] mutagen_file[cover_tag] = image_data @@ -1041,7 +1176,7 @@ class MediaField(object): getting this property. """ - self.out_type = kwargs.get('out_type', unicode) + self.out_type = kwargs.get('out_type', six.text_type) self._styles = styles def styles(self, mutagen_file): @@ -1080,7 +1215,7 @@ class MediaField(object): return 0.0 elif self.out_type == bool: return False - elif self.out_type == unicode: + elif self.out_type == six.text_type: return u'' @@ -1161,9 +1296,9 @@ class DateField(MediaField): """ # Get the underlying data and split on hyphens and slashes. datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, basestring): - datestring = re.sub(r'[Tt ].*$', '', unicode(datestring)) - items = re.split('[-/]', unicode(datestring)) + if isinstance(datestring, six.string_types): + datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) + items = re.split('[-/]', six.text_type(datestring)) else: items = [] @@ -1200,7 +1335,7 @@ class DateField(MediaField): date.append(u'{0:02d}'.format(int(month))) if month and day: date.append(u'{0:02d}'.format(int(day))) - date = map(unicode, date) + date = map(six.text_type, date) super(DateField, self).__set__(mediafile, u'-'.join(date)) if hasattr(self, '_year_field'): @@ -1238,18 +1373,31 @@ class DateItemField(MediaField): class CoverArtField(MediaField): """A descriptor that provides access to the *raw image data* for the - first image on a file. This is used for backwards compatibility: the + cover image on a file. This is used for backwards compatibility: the full `ImageListField` provides richer `Image` objects. + + When there are multiple images we try to pick the most likely to be a front + cover. """ def __init__(self): pass def __get__(self, mediafile, _): - try: - return mediafile.images[0].data - except IndexError: + candidates = mediafile.images + if candidates: + return self.guess_cover_image(candidates).data + else: return None + @staticmethod + def guess_cover_image(candidates): + if len(candidates) == 1: + return candidates[0] + try: + return next(c for c in candidates if c.type == ImageType.front) + except StopIteration: + return candidates[0] + def __set__(self, mediafile, data): if data: mediafile.images = [Image(data=data)] @@ -1297,37 +1445,7 @@ class MediaFile(object): """ self.path = path - unreadable_exc = ( - mutagen.mp3.error, - mutagen.id3.error, - mutagen.flac.error, - mutagen.monkeysaudio.MonkeysAudioHeaderError, - mutagen.mp4.error, - mutagen.oggopus.error, - mutagen.oggvorbis.error, - mutagen.ogg.error, - mutagen.asf.error, - mutagen.apev2.error, - mutagen.aiff.error, - ) - try: - self.mgfile = mutagen.File(path) - except unreadable_exc as exc: - log.debug(u'header parsing failed: {0}'.format(unicode(exc))) - raise UnreadableFileError(path) - except IOError as exc: - if type(exc) == IOError: - # This is a base IOError, not a subclass from Mutagen or - # anywhere else. - raise - else: - log.debug(traceback.format_exc()) - raise MutagenError(path, exc) - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug(traceback.format_exc()) - log.error(u'uncaught Mutagen exception in open: {0}'.format(exc)) - raise MutagenError(path, exc) + self.mgfile = mutagen_call('open', path, mutagen.File, path) if self.mgfile is None: # Mutagen couldn't guess the type @@ -1335,20 +1453,10 @@ class MediaFile(object): elif (type(self.mgfile).__name__ == 'M4A' or type(self.mgfile).__name__ == 'MP4'): info = self.mgfile.info - if hasattr(info, 'codec'): - if info.codec and info.codec.startswith('alac'): - self.type = 'alac' - else: - self.type = 'aac' + if info.codec and info.codec.startswith('alac'): + self.type = 'alac' else: - # This hack differentiates AAC and ALAC on versions of - # Mutagen < 1.26. Once Mutagen > 1.26 is out and - # required by beets, we can remove this. - if hasattr(self.mgfile.info, 'bitrate') and \ - self.mgfile.info.bitrate > 0: - self.type = 'aac' - else: - self.type = 'alac' + self.type = 'aac' elif (type(self.mgfile).__name__ == 'ID3' or type(self.mgfile).__name__ == 'MP3'): self.type = 'mp3' @@ -1368,6 +1476,8 @@ class MediaFile(object): self.type = 'asf' elif type(self.mgfile).__name__ == 'AIFF': self.type = 'aiff' + elif type(self.mgfile).__name__ == 'DSF': + self.type = 'dsf' else: raise FileTypeError(path, type(self.mgfile).__name__) @@ -1379,7 +1489,8 @@ class MediaFile(object): self.id3v23 = id3v23 and self.type == 'mp3' def save(self): - """Write the object's tags back to the file. + """Write the object's tags back to the file. May + throw `UnreadableFileError`. """ # Possibly save the tags to ID3v2.3. kwargs = {} @@ -1391,27 +1502,13 @@ class MediaFile(object): id3.update_to_v23() kwargs['v2_version'] = 3 - # Isolate bugs in Mutagen. - try: - self.mgfile.save(**kwargs) - except (IOError, OSError): - # Propagate these through: they don't represent Mutagen bugs. - raise - except Exception as exc: - log.debug(traceback.format_exc()) - log.error(u'uncaught Mutagen exception in save: {0}'.format(exc)) - raise MutagenError(self.path, exc) + mutagen_call('save', self.path, self.mgfile.save, **kwargs) def delete(self): - """Remove the current metadata tag from the file. + """Remove the current metadata tag from the file. May + throw `UnreadableFileError`. """ - try: - self.mgfile.delete() - except NotImplementedError: - # For Mutagen types that don't support deletion (notably, - # ASF), just delete each tag individually. - for tag in self.mgfile.keys(): - del self.mgfile[tag] + mutagen_call('delete', self.path, self.mgfile.delete) # Convenient access to the set of available fields. @@ -1423,7 +1520,40 @@ class MediaFile(object): """ for property, descriptor in cls.__dict__.items(): if isinstance(descriptor, MediaField): - yield property + if isinstance(property, bytes): + # On Python 2, class field names are bytes. This method + # produces text strings. + yield property.decode('utf8', 'ignore') + else: + yield property + + @classmethod + def _field_sort_name(cls, name): + """Get a sort key for a field name that determines the order + fields should be written in. + + Fields names are kept unchanged, unless they are instances of + :class:`DateItemField`, in which case `year`, `month`, and `day` + are replaced by `date0`, `date1`, and `date2`, respectively, to + make them appear in that order. + """ + if isinstance(cls.__dict__[name], DateItemField): + name = re.sub('year', 'date0', name) + name = re.sub('month', 'date1', name) + name = re.sub('day', 'date2', name) + return name + + @classmethod + def sorted_fields(cls): + """Get the names of all writable metadata fields, sorted in the + order that they should be written. + + This is a lexicographic order, except for instances of + :class:`DateItemField`, which are sorted in year-month-day + order. + """ + for property in sorted(cls.fields(), key=cls._field_sort_name): + yield property @classmethod def readable_fields(cls): @@ -1461,7 +1591,7 @@ class MediaFile(object): the `MediaFile`. If a key has the value `None`, the corresponding property is deleted from the `MediaFile`. """ - for field in self.fields(): + for field in self.sorted_fields(): if field in dict: if dict[field] is None: delattr(self, field) @@ -1472,39 +1602,52 @@ class MediaFile(object): title = MediaField( MP3StorageStyle('TIT2'), - MP4StorageStyle("\xa9nam"), + MP4StorageStyle('\xa9nam'), StorageStyle('TITLE'), ASFStorageStyle('Title'), ) artist = MediaField( MP3StorageStyle('TPE1'), - MP4StorageStyle("\xa9ART"), + MP4StorageStyle('\xa9ART'), StorageStyle('ARTIST'), ASFStorageStyle('Author'), ) album = MediaField( MP3StorageStyle('TALB'), - MP4StorageStyle("\xa9alb"), + MP4StorageStyle('\xa9alb'), StorageStyle('ALBUM'), ASFStorageStyle('WM/AlbumTitle'), ) genres = ListMediaField( MP3ListStorageStyle('TCON'), - MP4ListStorageStyle("\xa9gen"), + MP4ListStorageStyle('\xa9gen'), ListStorageStyle('GENRE'), ASFStorageStyle('WM/Genre'), ) genre = genres.single_field() + lyricist = MediaField( + MP3StorageStyle('TEXT'), + MP4StorageStyle('----:com.apple.iTunes:LYRICIST'), + StorageStyle('LYRICIST'), + ASFStorageStyle('WM/Writer'), + ) composer = MediaField( MP3StorageStyle('TCOM'), - MP4StorageStyle("\xa9wrt"), + MP4StorageStyle('\xa9wrt'), StorageStyle('COMPOSER'), ASFStorageStyle('WM/Composer'), ) + arranger = MediaField( + MP3PeopleStorageStyle('TIPL', involvement='arranger'), + MP4StorageStyle('----:com.apple.iTunes:Arranger'), + StorageStyle('ARRANGER'), + ASFStorageStyle('beets/Arranger'), + ) + grouping = MediaField( MP3StorageStyle('TIT1'), - MP4StorageStyle("\xa9grp"), + MP4StorageStyle('\xa9grp'), StorageStyle('GROUPING'), ASFStorageStyle('WM/ContentGroupDescription'), ) @@ -1544,13 +1687,13 @@ class MediaFile(object): ) lyrics = MediaField( MP3DescStorageStyle(key='USLT'), - MP4StorageStyle("\xa9lyr"), + MP4StorageStyle('\xa9lyr'), StorageStyle('LYRICS'), ASFStorageStyle('WM/Lyrics'), ) comments = MediaField( MP3DescStorageStyle(key='COMM'), - MP4StorageStyle("\xa9cmt"), + MP4StorageStyle('\xa9cmt'), StorageStyle('DESCRIPTION'), StorageStyle('COMMENT'), ASFStorageStyle('WM/Comments'), @@ -1593,76 +1736,76 @@ class MediaFile(object): ) artist_sort = MediaField( MP3StorageStyle('TSOP'), - MP4StorageStyle("soar"), + MP4StorageStyle('soar'), StorageStyle('ARTISTSORT'), ASFStorageStyle('WM/ArtistSortOrder'), ) albumartist_sort = MediaField( MP3DescStorageStyle(u'ALBUMARTISTSORT'), - MP4StorageStyle("soaa"), + MP4StorageStyle('soaa'), StorageStyle('ALBUMARTISTSORT'), ASFStorageStyle('WM/AlbumArtistSortOrder'), ) asin = MediaField( MP3DescStorageStyle(u'ASIN'), - MP4StorageStyle("----:com.apple.iTunes:ASIN"), + MP4StorageStyle('----:com.apple.iTunes:ASIN'), StorageStyle('ASIN'), ASFStorageStyle('MusicBrainz/ASIN'), ) catalognum = MediaField( MP3DescStorageStyle(u'CATALOGNUMBER'), - MP4StorageStyle("----:com.apple.iTunes:CATALOGNUMBER"), + MP4StorageStyle('----:com.apple.iTunes:CATALOGNUMBER'), StorageStyle('CATALOGNUMBER'), ASFStorageStyle('WM/CatalogNo'), ) disctitle = MediaField( MP3StorageStyle('TSST'), - MP4StorageStyle("----:com.apple.iTunes:DISCSUBTITLE"), + MP4StorageStyle('----:com.apple.iTunes:DISCSUBTITLE'), StorageStyle('DISCSUBTITLE'), ASFStorageStyle('WM/SetSubTitle'), ) encoder = MediaField( MP3StorageStyle('TENC'), - MP4StorageStyle("\xa9too"), + MP4StorageStyle('\xa9too'), StorageStyle('ENCODEDBY'), StorageStyle('ENCODER'), ASFStorageStyle('WM/EncodedBy'), ) script = MediaField( MP3DescStorageStyle(u'Script'), - MP4StorageStyle("----:com.apple.iTunes:SCRIPT"), + MP4StorageStyle('----:com.apple.iTunes:SCRIPT'), StorageStyle('SCRIPT'), ASFStorageStyle('WM/Script'), ) language = MediaField( MP3StorageStyle('TLAN'), - MP4StorageStyle("----:com.apple.iTunes:LANGUAGE"), + MP4StorageStyle('----:com.apple.iTunes:LANGUAGE'), StorageStyle('LANGUAGE'), ASFStorageStyle('WM/Language'), ) country = MediaField( - MP3DescStorageStyle('MusicBrainz Album Release Country'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz " - "Album Release Country"), + MP3DescStorageStyle(u'MusicBrainz Album Release Country'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz ' + 'Album Release Country'), StorageStyle('RELEASECOUNTRY'), ASFStorageStyle('MusicBrainz/Album Release Country'), ) albumstatus = MediaField( MP3DescStorageStyle(u'MusicBrainz Album Status'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz Album Status"), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Status'), StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), ASFStorageStyle('MusicBrainz/Album Status'), ) media = MediaField( MP3StorageStyle('TMED'), - MP4StorageStyle("----:com.apple.iTunes:MEDIA"), + MP4StorageStyle('----:com.apple.iTunes:MEDIA'), StorageStyle('MEDIA'), ASFStorageStyle('WM/Media'), ) albumdisambig = MediaField( # This tag mapping was invented for beets (not used by Picard, etc). MP3DescStorageStyle(u'MusicBrainz Album Comment'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz Album Comment"), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Comment'), StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), ASFStorageStyle('MusicBrainz/Album Comment'), ) @@ -1670,7 +1813,7 @@ class MediaFile(object): # Release date. date = DateField( MP3StorageStyle('TDRC'), - MP4StorageStyle("\xa9day"), + MP4StorageStyle('\xa9day'), StorageStyle('DATE'), ASFStorageStyle('WM/Year'), year=(StorageStyle('YEAR'),)) @@ -1693,13 +1836,13 @@ class MediaFile(object): # Nonstandard metadata. artist_credit = MediaField( MP3DescStorageStyle(u'Artist Credit'), - MP4StorageStyle("----:com.apple.iTunes:Artist Credit"), + MP4StorageStyle('----:com.apple.iTunes:Artist Credit'), StorageStyle('ARTIST_CREDIT'), ASFStorageStyle('beets/Artist Credit'), ) albumartist_credit = MediaField( MP3DescStorageStyle(u'Album Artist Credit'), - MP4StorageStyle("----:com.apple.iTunes:Album Artist Credit"), + MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), StorageStyle('ALBUMARTIST_CREDIT'), ASFStorageStyle('beets/Album Artist Credit'), ) @@ -1773,7 +1916,7 @@ class MediaFile(object): ), MP4StorageStyle( '----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=b' dB' + float_places=2, suffix=' dB' ), MP4SoundCheckStorageStyle( '----:com.apple.iTunes:iTunNORM', @@ -1798,9 +1941,9 @@ class MediaFile(object): u'replaygain_album_gain', float_places=2, suffix=u' dB' ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=1 + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_album_gain', + float_places=2, suffix=' dB' ), StorageStyle( u'REPLAYGAIN_ALBUM_GAIN', @@ -1891,13 +2034,6 @@ class MediaFile(object): @property def channels(self): """The number of channels in the audio (an int).""" - if isinstance(self.mgfile.info, mutagen.mp3.MPEGInfo): - return { - mutagen.mp3.STEREO: 2, - mutagen.mp3.JOINTSTEREO: 2, - mutagen.mp3.DUALCHANNEL: 2, - mutagen.mp3.MONO: 1, - }[self.mgfile.info.mode] if hasattr(self.mgfile.info, 'channels'): return self.mgfile.info.channels return 0 diff --git a/lib/beets/plugins.py b/lib/beets/plugins.py index 8611b92a..2ecdb847 100755 --- a/lib/beets/plugins.py +++ b/lib/beets/plugins.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,15 +15,19 @@ """Support for beets plugins.""" -import logging -import traceback +from __future__ import division, absolute_import, print_function + import inspect +import traceback import re from collections import defaultdict +from functools import wraps import beets +from beets import logging from beets import mediafile +import six PLUGIN_NAMESPACE = 'beetsplug' @@ -41,6 +46,23 @@ class PluginConflictException(Exception): """ +class PluginLogFilter(logging.Filter): + """A logging filter that identifies the plugin that emitted a log + message. + """ + def __init__(self, plugin): + self.prefix = u'{0}: '.format(plugin.name) + + def filter(self, record): + if hasattr(record.msg, 'msg') and isinstance(record.msg.msg, + six.string_types): + # A _LogMessage from our hacked-up Logging replacement. + record.msg.msg = self.prefix + record.msg.msg + elif isinstance(record.msg, six.string_types): + record.msg = self.prefix + record.msg + return True + + # Managing the plugins themselves. class BeetsPlugin(object): @@ -51,7 +73,6 @@ class BeetsPlugin(object): def __init__(self, name=None): """Perform one-time plugin setup. """ - self.import_stages = [] self.name = name or self.__module__.split('.')[-1] self.config = beets.config[self.name] if not self.template_funcs: @@ -60,6 +81,12 @@ class BeetsPlugin(object): self.template_fields = {} if not self.album_template_fields: self.album_template_fields = {} + self.import_stages = [] + + self._log = log.getChild(self.name) + self._log.setLevel(logging.NOTSET) # Use `beets` logger level. + if not any(isinstance(f, PluginLogFilter) for f in self._log.filters): + self._log.addFilter(PluginLogFilter(self)) def commands(self): """Should return a list of beets.ui.Subcommand objects for @@ -67,6 +94,46 @@ class BeetsPlugin(object): """ return () + def get_import_stages(self): + """Return a list of functions that should be called as importer + pipelines stages. + + The callables are wrapped versions of the functions in + `self.import_stages`. Wrapping provides some bookkeeping for the + plugin: specifically, the logging level is adjusted to WARNING. + """ + return [self._set_log_level_and_params(logging.WARNING, import_stage) + for import_stage in self.import_stages] + + def _set_log_level_and_params(self, base_log_level, func): + """Wrap `func` to temporarily set this plugin's logger level to + `base_log_level` + config options (and restore it to its previous + value after the function returns). Also determines which params may not + be sent for backwards-compatibility. + """ + argspec = inspect.getargspec(func) + + @wraps(func) + def wrapper(*args, **kwargs): + assert self._log.level == logging.NOTSET + verbosity = beets.config['verbose'].get(int) + log_level = max(logging.DEBUG, base_log_level - 10 * verbosity) + self._log.setLevel(log_level) + try: + try: + return func(*args, **kwargs) + except TypeError as exc: + if exc.args[0].startswith(func.__name__): + # caused by 'func' and not stuff internal to 'func' + kwargs = dict((arg, val) for arg, val in kwargs.items() + if arg in argspec.args) + return func(*args, **kwargs) + else: + raise + finally: + self._log.setLevel(logging.NOTSET) + return wrapper + def queries(self): """Should return a dict mapping prefixes to Query subclasses. """ @@ -123,37 +190,21 @@ class BeetsPlugin(object): mediafile.MediaFile.add_field(name, descriptor) library.Item._media_fields.add(name) + _raw_listeners = None listeners = None - @classmethod - def register_listener(cls, event, func): - """Add a function as a listener for the specified event. (An - imperative alternative to the @listen decorator.) + def register_listener(self, event, func): + """Add a function as a listener for the specified event. """ - if cls.listeners is None: + wrapped_func = self._set_log_level_and_params(logging.WARNING, func) + + cls = self.__class__ + if cls.listeners is None or cls._raw_listeners is None: + cls._raw_listeners = defaultdict(list) cls.listeners = defaultdict(list) - cls.listeners[event].append(func) - - @classmethod - def listen(cls, event): - """Decorator that adds a function as an event handler for the - specified event (as a string). The parameters passed to function - will vary depending on what event occurred. - - The function should respond to named parameters. - function(**kwargs) will trap all arguments in a dictionary. - Example: - - >>> @MyPlugin.listen("imported") - >>> def importListener(**kwargs): - ... pass - """ - def helper(func): - if cls.listeners is None: - cls.listeners = defaultdict(list) - cls.listeners[event].append(func) - return func - return helper + if func not in cls._raw_listeners[event]: + cls._raw_listeners[event].append(func) + cls.listeners[event].append(wrapped_func) template_funcs = None template_fields = None @@ -197,14 +248,14 @@ def load_plugins(names=()): BeetsPlugin subclasses desired. """ for name in names: - modname = '%s.%s' % (PLUGIN_NAMESPACE, name) + modname = '{0}.{1}'.format(PLUGIN_NAMESPACE, name) try: try: namespace = __import__(modname, None, None) except ImportError as exc: # Again, this is hacky: if exc.args[0].endswith(' ' + name): - log.warn(u'** plugin {0} not found'.format(name)) + log.warning(u'** plugin {0} not found', name) else: raise else: @@ -214,8 +265,11 @@ def load_plugins(names=()): _classes.add(obj) except: - log.warn(u'** error loading plugin {0}'.format(name)) - log.warn(traceback.format_exc()) + log.warning( + u'** error loading plugin {}:\n{}', + name, + traceback.format_exc(), + ) _instances = {} @@ -267,8 +321,8 @@ def types(model_cls): if field in types and plugin_types[field] != types[field]: raise PluginConflictException( u'Plugin {0} defines flexible field {1} ' - 'which has already been defined with ' - 'another type.'.format(plugin.name, field) + u'which has already been defined with ' + u'another type.'.format(plugin.name, field) ) types.update(plugin_types) return types @@ -297,41 +351,35 @@ def album_distance(items, album_info, mapping): def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. """ - out = [] for plugin in find_plugins(): - out.extend(plugin.candidates(items, artist, album, va_likely)) - return out + for candidate in plugin.candidates(items, artist, album, va_likely): + yield candidate def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ - out = [] for plugin in find_plugins(): - out.extend(plugin.item_candidates(item, artist, title)) - return out + for item_candidate in plugin.item_candidates(item, artist, title): + yield item_candidate def album_for_id(album_id): """Get AlbumInfo objects for a given ID string. """ - out = [] for plugin in find_plugins(): - res = plugin.album_for_id(album_id) - if res: - out.append(res) - return out + album = plugin.album_for_id(album_id) + if album: + yield album def track_for_id(track_id): """Get TrackInfo objects for a given ID string. """ - out = [] for plugin in find_plugins(): - res = plugin.track_for_id(track_id) - if res: - out.append(res) - return out + track = plugin.track_for_id(track_id) + if track: + yield track def template_funcs(): @@ -349,8 +397,7 @@ def import_stages(): """Get a list of import stage functions defined by plugins.""" stages = [] for plugin in find_plugins(): - if hasattr(plugin, 'import_stages'): - stages += plugin.import_stages + stages += plugin.get_import_stages() return stages @@ -392,18 +439,20 @@ def event_handlers(): def send(event, **arguments): - """Sends an event to all assigned event listeners. Event is the - name of the event to send, all other named arguments go to the - event handler(s). + """Send an event to all assigned event listeners. - Returns a list of return values from the handlers. + `event` is the name of the event to send, all other named arguments + are passed along to the handlers. + + Return a list of non-None values returned from the handlers. """ - log.debug(u'Sending event: {0}'.format(event)) + log.debug(u'Sending event: {0}', event) + results = [] for handler in event_handlers()[event]: - # Don't break legacy plugins if we want to pass more arguments - argspec = inspect.getargspec(handler).args - args = dict((k, v) for k, v in arguments.items() if k in argspec) - handler(**args) + result = handler(**arguments) + if result is not None: + results.append(result) + return results def feat_tokens(for_artist=True): @@ -433,3 +482,19 @@ def sanitize_choices(choices, choices_all): if not (s in seen or seen.add(s)): res.extend(list(others) if s == '*' else [s]) return res + + +def notify_info_yielded(event): + """Makes a generator send the event 'event' every time it yields. + This decorator is supposed to decorate a generator, but any function + returning an iterable should work. + Each yielded value is passed to plugins using the 'info' parameter of + 'send'. + """ + def decorator(generator): + def decorated(*args, **kwargs): + for v in generator(*args, **kwargs): + send(event, info=v) + yield v + return decorated + return decorator diff --git a/lib/beets/ui/__init__.py b/lib/beets/ui/__init__.py old mode 100644 new mode 100755 index 8978ff54..df370b52 --- a/lib/beets/ui/__init__.py +++ b/lib/beets/ui/__init__.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -16,28 +17,31 @@ interface. To invoke the CLI, just call beets.ui.main(). The actual CLI commands are implemented in the ui.commands module. """ -from __future__ import print_function -import locale +from __future__ import division, absolute_import, print_function + import optparse import textwrap import sys from difflib import SequenceMatcher -import logging import sqlite3 import errno import re import struct import traceback import os.path +from six.moves import input +from beets import logging from beets import library from beets import plugins from beets import util from beets.util.functemplate import Template from beets import config -from beets.util import confit +from beets.util import confit, as_string from beets.autotag import mb +from beets.dbcore import query as db_query +import six # On Windows platforms, use colorama to support "ANSI" terminal colors. if sys.platform == 'win32': @@ -56,8 +60,8 @@ log.propagate = False # Don't propagate to root handler. PF_KEY_QUERIES = { - 'comp': 'comp:true', - 'singleton': 'singleton:true', + 'comp': u'comp:true', + 'singleton': u'singleton:true', } @@ -67,68 +71,149 @@ class UserError(Exception): """ -# Utilities. +# Encoding utilities. -def _encoding(): - """Tries to guess the encoding used by the terminal.""" + +def _in_encoding(): + """Get the encoding to use for *inputting* strings from the console. + """ + return _stream_encoding(sys.stdin) + + +def _out_encoding(): + """Get the encoding to use for *outputting* strings to the console. + """ + return _stream_encoding(sys.stdout) + + +def _stream_encoding(stream, default='utf-8'): + """A helper for `_in_encoding` and `_out_encoding`: get the stream's + preferred encoding, using a configured override or a default + fallback if neither is not specified. + """ # Configured override? encoding = config['terminal_encoding'].get() if encoding: return encoding - # Determine from locale settings. - try: - return locale.getdefaultlocale()[1] or 'utf8' - except ValueError: - # Invalid locale environment variable setting. To avoid - # failing entirely for no good reason, assume UTF-8. - return 'utf8' + # For testing: When sys.stdout or sys.stdin is a StringIO under the + # test harness, it doesn't have an `encoding` attribute. Just use + # UTF-8. + if not hasattr(stream, 'encoding'): + return default + + # Python's guessed output stream encoding, or UTF-8 as a fallback + # (e.g., when piped to a file). + return stream.encoding or default def decargs(arglist): """Given a list of command-line argument bytestrings, attempts to - decode them to Unicode strings. + decode them to Unicode strings when running under Python 2. """ - return [s.decode(_encoding()) for s in arglist] + if six.PY2: + return [s.decode(util.arg_encoding()) for s in arglist] + else: + return arglist -def print_(*strings): +def print_(*strings, **kwargs): """Like print, but rather than raising an error when a character is not in the terminal's encoding's character set, just silently replaces it. - """ - if strings: - if isinstance(strings[0], unicode): - txt = u' '.join(strings) - else: - txt = ' '.join(strings) - else: - txt = u'' - if isinstance(txt, unicode): - txt = txt.encode(_encoding(), 'replace') - print(txt) + The arguments must be Unicode strings: `unicode` on Python 2; `str` on + Python 3. + + The `end` keyword argument behaves similarly to the built-in `print` + (it defaults to a newline). + """ + if not strings: + strings = [u''] + assert isinstance(strings[0], six.text_type) + + txt = u' '.join(strings) + txt += kwargs.get('end', u'\n') + + # Encode the string and write it to stdout. + if six.PY2: + # On Python 2, sys.stdout expects bytes. + out = txt.encode(_out_encoding(), 'replace') + sys.stdout.write(out) + else: + # On Python 3, sys.stdout expects text strings and uses the + # exception-throwing encoding error policy. To avoid throwing + # errors and use our configurable encoding override, we use the + # underlying bytes buffer instead. + if hasattr(sys.stdout, 'buffer'): + out = txt.encode(_out_encoding(), 'replace') + sys.stdout.buffer.write(out) + else: + # In our test harnesses (e.g., DummyOut), sys.stdout.buffer + # does not exist. We instead just record the text string. + sys.stdout.write(txt) + + +# Configuration wrappers. + +def _bool_fallback(a, b): + """Given a boolean or None, return the original value or a fallback. + """ + if a is None: + assert isinstance(b, bool) + return b + else: + assert isinstance(a, bool) + return a + + +def should_write(write_opt=None): + """Decide whether a command that updates metadata should also write + tags, using the importer configuration as the default. + """ + return _bool_fallback(write_opt, config['import']['write'].get(bool)) + + +def should_move(move_opt=None): + """Decide whether a command that updates metadata should also move + files when they're inside the library, using the importer + configuration as the default. + + Specifically, commands should move files after metadata updates only + when the importer is configured *either* to move *or* to copy files. + They should avoid moving files when the importer is configured not + to touch any filenames. + """ + return _bool_fallback( + move_opt, + config['import']['move'].get(bool) or + config['import']['copy'].get(bool) + ) + + +# Input prompts. def input_(prompt=None): - """Like `raw_input`, but decodes the result to a Unicode string. + """Like `input`, but decodes the result to a Unicode string. Raises a UserError if stdin is not available. The prompt is sent to stdout rather than stderr. A printed between the prompt and the input cursor. """ # raw_input incorrectly sends prompts to stderr, not stdout, so we - # use print() explicitly to display prompts. + # use print_() explicitly to display prompts. # http://bugs.python.org/issue1927 if prompt: - if isinstance(prompt, unicode): - prompt = prompt.encode(_encoding(), 'replace') - print(prompt, end=' ') + print_(prompt, end=u' ') try: - resp = raw_input() + resp = input() except EOFError: - raise UserError('stdin stream ended while input required') + raise UserError(u'stdin stream ended while input required') - return resp.decode(sys.stdin.encoding or 'utf8', 'ignore') + if six.PY2: + return resp.decode(_in_encoding(), 'ignore') + else: + return resp def input_options(options, require=False, prompt=None, fallback_prompt=None, @@ -172,7 +257,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, found_letter = letter break else: - raise ValueError('no unambiguous lettering found') + raise ValueError(u'no unambiguous lettering found') letters[found_letter.lower()] = option index = option.index(found_letter) @@ -180,7 +265,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Mark the option's shortcut letter for display. if not require and ( (default is None and not numrange and first) or - (isinstance(default, basestring) and + (isinstance(default, six.string_types) and found_letter.lower() == default.lower())): # The first option is the default; mark it. show_letter = '[%s]' % found_letter.upper() @@ -190,7 +275,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, is_default = False # Colorize the letter shortcut. - show_letter = colorize('turquoise' if is_default else 'blue', + show_letter = colorize('action_default' if is_default else 'action', show_letter) # Insert the highlighted letter back into the word. @@ -216,11 +301,11 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, prompt_part_lengths = [] if numrange: if isinstance(default, int): - default_name = str(default) - default_name = colorize('turquoise', default_name) + default_name = six.text_type(default) + default_name = colorize('action_default', default_name) tmpl = '# selection (default %s)' prompt_parts.append(tmpl % default_name) - prompt_part_lengths.append(len(tmpl % str(default))) + prompt_part_lengths.append(len(tmpl % six.text_type(default))) else: prompt_parts.append('# selection') prompt_part_lengths.append(len(prompt_parts[-1])) @@ -255,9 +340,9 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Make a fallback prompt too. This is displayed if the user enters # something that is not recognized. if not fallback_prompt: - fallback_prompt = 'Enter one of ' + fallback_prompt = u'Enter one of ' if numrange: - fallback_prompt += '%i-%i, ' % numrange + fallback_prompt += u'%i-%i, ' % numrange fallback_prompt += ', '.join(display_letters) + ':' resp = input_(prompt) @@ -296,19 +381,52 @@ def input_yn(prompt, require=False): "yes" unless `require` is `True`, in which case there is no default. """ sel = input_options( - ('y', 'n'), require, prompt, 'Enter Y or N:' + ('y', 'n'), require, prompt, u'Enter Y or N:' ) - return sel == 'y' + return sel == u'y' +def input_select_objects(prompt, objs, rep): + """Prompt to user to choose all, none, or some of the given objects. + Return the list of selected objects. + + `prompt` is the prompt string to use for each question (it should be + phrased as an imperative verb). `rep` is a function to call on each + object to print it out when confirming objects individually. + """ + choice = input_options( + (u'y', u'n', u's'), False, + u'%s? (Yes/no/select)' % prompt) + print() # Blank line. + + if choice == u'y': # Yes. + return objs + + elif choice == u's': # Select. + out = [] + for obj in objs: + rep(obj) + if input_yn(u'%s? (yes/no)' % prompt, True): + out.append(obj) + print() # go to a new line + return out + + else: # No. + return [] + + +# Human output formatting. + def human_bytes(size): """Formats size, a number of bytes, in a human-readable way.""" - suffices = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB', 'HB'] - for suffix in suffices: + powers = [u'', u'K', u'M', u'G', u'T', u'P', u'E', u'Z', u'Y', u'H'] + unit = 'B' + for power in powers: if size < 1024: - return "%3.1f %s" % (size, suffix) + return u"%3.1f %s%s" % (size, power, unit) size /= 1024.0 - return "big" + unit = u'iB' + return u"big" def human_seconds(interval): @@ -316,13 +434,13 @@ def human_seconds(interval): interval using English words. """ units = [ - (1, 'second'), - (60, 'minute'), - (60, 'hour'), - (24, 'day'), - (7, 'week'), - (52, 'year'), - (10, 'decade'), + (1, u'second'), + (60, u'minute'), + (60, u'hour'), + (24, u'day'), + (7, u'week'), + (52, u'year'), + (10, u'decade'), ] for i in range(len(units) - 1): increment, suffix = units[i] @@ -335,7 +453,7 @@ def human_seconds(interval): increment, suffix = units[-1] interval /= float(increment) - return "%3.1f %ss" % (interval, suffix) + return u"%3.1f %ss" % (interval, suffix) def human_seconds_short(interval): @@ -346,16 +464,45 @@ def human_seconds_short(interval): return u'%i:%02i' % (interval // 60, interval % 60) +# Colorization. + # ANSI terminal colorization code heavily inspired by pygments: # http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py # (pygments is by Tim Hatch, Armin Ronacher, et al.) COLOR_ESCAPE = "\x1b[" -DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue", - "purple", "teal", "lightgray"] -LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue", - "fuchsia", "turquoise", "white"] +DARK_COLORS = { + "black": 0, + "darkred": 1, + "darkgreen": 2, + "brown": 3, + "darkyellow": 3, + "darkblue": 4, + "purple": 5, + "darkmagenta": 5, + "teal": 6, + "darkcyan": 6, + "lightgray": 7 +} +LIGHT_COLORS = { + "darkgray": 0, + "red": 1, + "green": 2, + "yellow": 3, + "blue": 4, + "fuchsia": 5, + "magenta": 5, + "turquoise": 6, + "cyan": 6, + "white": 7 +} RESET_COLOR = COLOR_ESCAPE + "39;49;00m" +# These abstract COLOR_NAMES are lazily mapped on to the actual color in COLORS +# as they are defined in the configuration files, see function: colorize +COLOR_NAMES = ['text_success', 'text_warning', 'text_error', 'text_highlight', + 'text_highlight_minor', 'action_default', 'action'] +COLORS = None + def _colorize(color, text): """Returns a string that prints the given text in the given color @@ -363,34 +510,47 @@ def _colorize(color, text): in DARK_COLORS or LIGHT_COLORS. """ if color in DARK_COLORS: - escape = COLOR_ESCAPE + "%im" % (DARK_COLORS.index(color) + 30) + escape = COLOR_ESCAPE + "%im" % (DARK_COLORS[color] + 30) elif color in LIGHT_COLORS: - escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS.index(color) + 30) + escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS[color] + 30) else: - raise ValueError('no such color %s', color) + raise ValueError(u'no such color %s', color) return escape + text + RESET_COLOR -def colorize(color, text): +def colorize(color_name, text): """Colorize text if colored output is enabled. (Like _colorize but conditional.) """ - if config['color']: + if config['ui']['color']: + global COLORS + if not COLORS: + COLORS = dict((name, + config['ui']['colors'][name].as_str()) + for name in COLOR_NAMES) + # In case a 3rd party plugin is still passing the actual color ('red') + # instead of the abstract color name ('text_error') + color = COLORS.get(color_name) + if not color: + log.debug(u'Invalid color_name: {0}', color_name) + color = color_name return _colorize(color, text) else: return text -def _colordiff(a, b, highlight='red', minor_highlight='lightgray'): +def _colordiff(a, b, highlight='text_highlight', + minor_highlight='text_highlight_minor'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are stringified and highlighted in their entirety. """ - if not isinstance(a, basestring) or not isinstance(b, basestring): + if not isinstance(a, six.string_types) \ + or not isinstance(b, six.string_types): # Non-strings: use ordinary equality. - a = unicode(a) - b = unicode(b) + a = six.text_type(a) + b = six.text_type(b) if a == b: return a, b else: @@ -431,14 +591,14 @@ def _colordiff(a, b, highlight='red', minor_highlight='lightgray'): return u''.join(a_out), u''.join(b_out) -def colordiff(a, b, highlight='red'): +def colordiff(a, b, highlight='text_highlight'): """Colorize differences between two values if color is enabled. (Like _colordiff but conditional.) """ - if config['color']: + if config['ui']['color']: return _colordiff(a, b, highlight) else: - return unicode(a), unicode(b) + return six.text_type(a), six.text_type(b) def get_path_formats(subview=None): @@ -449,7 +609,7 @@ def get_path_formats(subview=None): subview = subview or config['paths'] for query, view in subview.items(): query = PF_KEY_QUERIES.get(query, query) # Expand common queries. - path_formats.append((query, Template(view.get(unicode)))) + path_formats.append((query, Template(view.as_str()))) return path_formats @@ -470,31 +630,6 @@ def get_replacements(): return replacements -def _pick_format(album, fmt=None): - """Pick a format string for printing Album or Item objects, - falling back to config options and defaults. - """ - if fmt: - return fmt - if album: - return config['list_format_album'].get(unicode) - else: - return config['list_format_item'].get(unicode) - - -def print_obj(obj, lib, fmt=None): - """Print an Album or Item object. If `fmt` is specified, use that - format string. Otherwise, use the configured template. - """ - album = isinstance(obj, library.Album) - fmt = _pick_format(album, fmt) - if isinstance(fmt, Template): - template = fmt - else: - template = Template(fmt) - print_(obj.evaluate_template(template)) - - def term_width(): """Get the width (columns) of the terminal.""" fallback = config['ui']['terminal_width'].get(int) @@ -542,10 +677,11 @@ def _field_diff(field, old, new): # For strings, highlight changes. For others, colorize the whole # thing. - if isinstance(oldval, basestring): + if isinstance(oldval, six.string_types): oldstr, newstr = colordiff(oldval, newstr) else: - oldstr, newstr = colorize('red', oldstr), colorize('red', newstr) + oldstr = colorize('text_error', oldstr) + newstr = colorize('text_error', newstr) return u'{0} -> {1}'.format(oldstr, newstr) @@ -581,18 +717,178 @@ def show_model_changes(new, old=None, fields=None, always=False): changes.append(u' {0}: {1}'.format( field, - colorize('red', new.formatted()[field]) + colorize('text_highlight', new.formatted()[field]) )) # Print changes. if changes or always: - print_obj(old, old._db) + print_(format(old)) if changes: print_(u'\n'.join(changes)) return bool(changes) +def show_path_changes(path_changes): + """Given a list of tuples (source, destination) that indicate the + path changes, log the changes as INFO-level output to the beets log. + The output is guaranteed to be unicode. + + Every pair is shown on a single line if the terminal width permits it, + else it is split over two lines. E.g., + + Source -> Destination + + vs. + + Source + -> Destination + """ + sources, destinations = zip(*path_changes) + + # Ensure unicode output + sources = list(map(util.displayable_path, sources)) + destinations = list(map(util.displayable_path, destinations)) + + # Calculate widths for terminal split + col_width = (term_width() - len(' -> ')) // 2 + max_width = len(max(sources + destinations, key=len)) + + if max_width > col_width: + # Print every change over two lines + for source, dest in zip(sources, destinations): + log.info(u'{0} \n -> {1}', source, dest) + else: + # Print every change on a single line, and add a header + title_pad = max_width - len('Source ') + len(' -> ') + + log.info(u'Source {0} Destination', ' ' * title_pad) + for source, dest in zip(sources, destinations): + pad = max_width - len(source) + log.info(u'{0} {1} -> {2}', source, ' ' * pad, dest) + + +class CommonOptionsParser(optparse.OptionParser, object): + """Offers a simple way to add common formatting options. + + Options available include: + - matching albums instead of tracks: add_album_option() + - showing paths instead of items/albums: add_path_option() + - changing the format of displayed items/albums: add_format_option() + + The last one can have several behaviors: + - against a special target + - with a certain format + - autodetected target with the album option + + Each method is fully documented in the related method. + """ + def __init__(self, *args, **kwargs): + super(CommonOptionsParser, self).__init__(*args, **kwargs) + self._album_flags = False + # this serves both as an indicator that we offer the feature AND allows + # us to check whether it has been specified on the CLI - bypassing the + # fact that arguments may be in any order + + def add_album_option(self, flags=('-a', '--album')): + """Add a -a/--album option to match albums instead of tracks. + + If used then the format option can auto-detect whether we're setting + the format for items or albums. + Sets the album property on the options extracted from the CLI. + """ + album = optparse.Option(*flags, action='store_true', + help=u'match albums instead of tracks') + self.add_option(album) + self._album_flags = set(flags) + + def _set_format(self, option, opt_str, value, parser, target=None, + fmt=None, store_true=False): + """Internal callback that sets the correct format while parsing CLI + arguments. + """ + if store_true: + setattr(parser.values, option.dest, True) + + # Use the explicitly specified format, or the string from the option. + if fmt: + value = fmt + elif value: + value, = decargs([value]) + else: + value = u'' + + parser.values.format = value + if target: + config[target._format_config_key].set(value) + else: + if self._album_flags: + if parser.values.album: + target = library.Album + else: + # the option is either missing either not parsed yet + if self._album_flags & set(parser.rargs): + target = library.Album + else: + target = library.Item + config[target._format_config_key].set(value) + else: + config[library.Item._format_config_key].set(value) + config[library.Album._format_config_key].set(value) + + def add_path_option(self, flags=('-p', '--path')): + """Add a -p/--path option to display the path instead of the default + format. + + By default this affects both items and albums. If add_album_option() + is used then the target will be autodetected. + + Sets the format property to u'$path' on the options extracted from the + CLI. + """ + path = optparse.Option(*flags, nargs=0, action='callback', + callback=self._set_format, + callback_kwargs={'fmt': u'$path', + 'store_true': True}, + help=u'print paths for matched items or albums') + self.add_option(path) + + def add_format_option(self, flags=('-f', '--format'), target=None): + """Add -f/--format option to print some LibModel instances with a + custom format. + + `target` is optional and can be one of ``library.Item``, 'item', + ``library.Album`` and 'album'. + + Several behaviors are available: + - if `target` is given then the format is only applied to that + LibModel + - if the album option is used then the target will be autodetected + - otherwise the format is applied to both items and albums. + + Sets the format property on the options extracted from the CLI. + """ + kwargs = {} + if target: + if isinstance(target, six.string_types): + target = {'item': library.Item, + 'album': library.Album}[target] + kwargs['target'] = target + + opt = optparse.Option(*flags, action='callback', + callback=self._set_format, + callback_kwargs=kwargs, + help=u'print with custom format') + self.add_option(opt) + + def add_all_common_options(self): + """Add album, path and format options. + """ + self.add_album_option() + self.add_path_option() + self.add_format_option() + + # Subcommand parsing infrastructure. # # This is a fairly generic subcommand parser for optparse. It is @@ -610,10 +906,10 @@ class Subcommand(object): the subcommand; aliases are alternate names. parser is an OptionParser responsible for parsing the subcommand's options. help is a short description of the command. If no parser is - given, it defaults to a new, empty OptionParser. + given, it defaults to a new, empty CommonOptionsParser. """ self.name = name - self.parser = parser or optparse.OptionParser() + self.parser = parser or CommonOptionsParser() self.aliases = aliases self.help = help self.hide = hide @@ -632,11 +928,11 @@ class Subcommand(object): @root_parser.setter def root_parser(self, root_parser): self._root_parser = root_parser - self.parser.prog = '{0} {1}'.format(root_parser.get_prog_name(), - self.name) + self.parser.prog = '{0} {1}'.format( + as_string(root_parser.get_prog_name()), self.name) -class SubcommandsOptionParser(optparse.OptionParser): +class SubcommandsOptionParser(CommonOptionsParser): """A variant of OptionParser that parses subcommands and their arguments. """ @@ -648,13 +944,13 @@ class SubcommandsOptionParser(optparse.OptionParser): """ # A more helpful default usage. if 'usage' not in kwargs: - kwargs['usage'] = """ + kwargs['usage'] = u""" %prog COMMAND [ARGS...] %prog help COMMAND""" kwargs['add_help_option'] = False # Super constructor. - optparse.OptionParser.__init__(self, *args, **kwargs) + super(SubcommandsOptionParser, self).__init__(*args, **kwargs) # Our root parser needs to stop on the first unrecognized argument. self.disable_interspersed_args() @@ -671,7 +967,7 @@ class SubcommandsOptionParser(optparse.OptionParser): # Add the list of subcommands to the help message. def format_help(self, formatter=None): # Get the original help message, to which we will append. - out = optparse.OptionParser.format_help(self, formatter) + out = super(SubcommandsOptionParser, self).format_help(formatter) if formatter is None: formatter = self.formatter @@ -711,7 +1007,8 @@ class SubcommandsOptionParser(optparse.OptionParser): result.append(name) help_width = formatter.width - help_position help_lines = textwrap.wrap(subcommand.help, help_width) - result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + help_line = help_lines[0] if help_lines else '' + result.append("%*s%s\n" % (indent_first, "", help_line)) result.extend(["%*s%s\n" % (help_position, "", line) for line in help_lines[1:]]) formatter.dedent() @@ -756,7 +1053,7 @@ class SubcommandsOptionParser(optparse.OptionParser): cmdname = args.pop(0) subcommand = self._subcommand_for_name(cmdname) if not subcommand: - raise UserError("unknown command '{0}'".format(cmdname)) + raise UserError(u"unknown command '{0}'".format(cmdname)) suboptions, subargs = subcommand.parse_args(args) return subcommand, suboptions, subargs @@ -765,53 +1062,24 @@ class SubcommandsOptionParser(optparse.OptionParser): optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) -def vararg_callback(option, opt_str, value, parser): - """Callback for an option with variable arguments. - Manually collect arguments right of a callback-action - option (ie. with action="callback"), and add the resulting - list to the destination var. - - Usage: - parser.add_option("-c", "--callback", dest="vararg_attr", - action="callback", callback=vararg_callback) - - Details: - http://docs.python.org/2/library/optparse.html#callback-example-6-variable - -arguments - """ - value = [value] - - def floatable(str): - try: - float(str) - return True - except ValueError: - return False - - for arg in parser.rargs: - # stop on --foo like options - if arg[:2] == "--" and len(arg) > 2: - break - # stop on -a, but not on -3 or -3.0 - if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): - break - value.append(arg) - - del parser.rargs[:len(value) - 1] - setattr(parser.values, option.dest, value) - - # The main entry point and bootstrapping. def _load_plugins(config): """Load the plugins specified in the configuration. """ - paths = config['pluginpath'].get(confit.StrSeq(split=False)) - paths = map(util.normpath, paths) + paths = config['pluginpath'].as_str_seq(split=False) + paths = [util.normpath(p) for p in paths] + log.debug(u'plugin paths: {0}', util.displayable_path(paths)) + # On Python 3, the search paths need to be unicode. + paths = [util.py3_path(p) for p in paths] + + # Extend the `beetsplug` package to include the plugin paths. import beetsplug beetsplug.__path__ = paths + beetsplug.__path__ - # For backwards compatibility. + + # For backwards compatibility, also support plugin paths that + # *contain* a `beetsplug` package. sys.path += paths plugins.load_plugins(config['plugins'].as_str_seq()) @@ -840,8 +1108,8 @@ def _setup(options, lib=None): if lib is None: lib = _open_library(config) plugins.send("library_opened", lib=lib) - library.Item._types = plugins.types(library.Item) - library.Album._types = plugins.types(library.Album) + library.Item._types.update(plugins.types(library.Item)) + library.Album._types.update(plugins.types(library.Album)) return subcommands, plugins, lib @@ -859,28 +1127,28 @@ def _configure(options): config.set_args(options) # Configure the logger. - if config['verbose'].get(bool): - log.setLevel(logging.DEBUG) + if config['verbose'].get(int): + log.set_global_level(logging.DEBUG) else: - log.setLevel(logging.INFO) + log.set_global_level(logging.INFO) config_path = config.user_config_path() if os.path.isfile(config_path): - log.debug(u'user configuration: {0}'.format( - util.displayable_path(config_path))) + log.debug(u'user configuration: {0}', + util.displayable_path(config_path)) else: - log.debug(u'no user configuration found at {0}'.format( - util.displayable_path(config_path))) + log.debug(u'no user configuration found at {0}', + util.displayable_path(config_path)) - log.debug(u'data directory: {0}' - .format(util.displayable_path(config.config_dir()))) + log.debug(u'data directory: {0}', + util.displayable_path(config.config_dir())) return config def _open_library(config): """Create a new library instance from the configuration. """ - dbpath = config['library'].as_filename() + dbpath = util.bytestring_path(config['library'].as_filename()) try: lib = library.Library( dbpath, @@ -890,14 +1158,14 @@ def _open_library(config): ) lib.get_item(0) # Test database connection. except (sqlite3.OperationalError, sqlite3.DatabaseError): - log.debug(traceback.format_exc()) + log.debug(u'{}', traceback.format_exc()) raise UserError(u"database file {0} could not be opened".format( util.displayable_path(dbpath) )) log.debug(u'library database: {0}\n' - u'library directory: {1}' - .format(util.displayable_path(lib.path), - util.displayable_path(lib.directory))) + u'library directory: {1}', + util.displayable_path(lib.path), + util.displayable_path(lib.directory)) return lib @@ -906,16 +1174,18 @@ def _raw_main(args, lib=None): handling. """ parser = SubcommandsOptionParser() + parser.add_format_option(flags=('--format-item',), target=library.Item) + parser.add_format_option(flags=('--format-album',), target=library.Album) parser.add_option('-l', '--library', dest='library', - help='library database file to use') + help=u'library database file to use') parser.add_option('-d', '--directory', dest='directory', - help="destination music directory") - parser.add_option('-v', '--verbose', dest='verbose', action='store_true', - help='print debugging information') + help=u"destination music directory") + parser.add_option('-v', '--verbose', dest='verbose', action='count', + help=u'log more details (use twice for even more)') parser.add_option('-c', '--config', dest='config', - help='path to configuration file') + help=u'path to configuration file') parser.add_option('-h', '--help', dest='help', action='store_true', - help='how this help message and exit') + help=u'show this help message and exit') parser.add_option('--version', dest='version', action='store_true', help=optparse.SUPPRESS_HELP) @@ -924,10 +1194,12 @@ def _raw_main(args, lib=None): # Special case for the `config --edit` command: bypass _setup so # that an invalid configuration does not prevent the editor from # starting. - if subargs[0] == 'config' and ('-e' in subargs or '--edit' in subargs): + if subargs and subargs[0] == 'config' \ + and ('-e' in subargs or '--edit' in subargs): from beets.ui.commands import config_edit return config_edit() + test_lib = bool(lib) subcommands, plugins, lib = _setup(options, lib) parser.add_subcommand(*subcommands) @@ -935,6 +1207,9 @@ def _raw_main(args, lib=None): subcommand.func(lib, suboptions, subargs) plugins.send('cli_exit', lib=lib) + if not test_lib: + # Clean up the library unless it came from the test harness. + lib._close() def main(args=None): @@ -945,7 +1220,7 @@ def main(args=None): _raw_main(args) except UserError as exc: message = exc.args[0] if exc.args else None - log.error(u'error: {0}'.format(message)) + log.error(u'error: {0}', message) sys.exit(1) except util.HumanReadableException as exc: exc.log(log) @@ -953,11 +1228,14 @@ def main(args=None): except library.FileOperationError as exc: # These errors have reasonable human-readable descriptions, but # we still want to log their tracebacks for debugging. - log.debug(traceback.format_exc()) - log.error(exc) + log.debug('{}', traceback.format_exc()) + log.error('{}', exc) sys.exit(1) except confit.ConfigError as exc: - log.error(u'configuration error: {0}'.format(exc)) + log.error(u'configuration error: {0}', exc) + sys.exit(1) + except db_query.InvalidQueryError as exc: + log.error(u'invalid query: {0}', exc) sys.exit(1) except IOError as exc: if exc.errno == errno.EPIPE: @@ -967,4 +1245,4 @@ def main(args=None): raise except KeyboardInterrupt: # Silently ignore ^C except in verbose mode. - log.debug(traceback.format_exc()) + log.debug(u'{}', traceback.format_exc()) diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py old mode 100644 new mode 100755 index 4dfac11c..8a07f614 --- a/lib/beets/ui/commands.py +++ b/lib/beets/ui/commands.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -15,19 +16,18 @@ """This module provides the default commands for beets' command-line interface. """ -from __future__ import print_function -import logging +from __future__ import division, absolute_import, print_function + import os -import time -import codecs -import platform import re -import shlex +from platform import python_version +from collections import namedtuple, Counter +from itertools import chain import beets from beets import ui -from beets.ui import print_, input_, decargs +from beets.ui import print_, input_, decargs, show_path_changes from beets import autotag from beets.autotag import Recommendation from beets.autotag import hooks @@ -35,12 +35,14 @@ from beets import plugins from beets import importer from beets import util from beets.util import syspath, normpath, ancestry, displayable_path -from beets.util.functemplate import Template from beets import library from beets import config +from beets import logging from beets.util.confit import _package_path +import six VARIOUS_ARTISTS = u'Various Artists' +PromptChoice = namedtuple('PromptChoice', ['short', 'long', 'callback']) # Global logger. log = logging.getLogger('beets') @@ -71,43 +73,47 @@ def _do_query(lib, query, album, also_items=True): items = list(lib.items(query)) if album and not albums: - raise ui.UserError('No matching albums found.') + raise ui.UserError(u'No matching albums found.') elif not album and not items: - raise ui.UserError('No matching items found.') + raise ui.UserError(u'No matching items found.') return items, albums # fields: Shows a list of available fields for queries and format strings. +def _print_keys(query): + """Given a SQLite query result, print the `key` field of each + returned row, with identation of 2 spaces. + """ + for row in query: + print_(u' ' * 2 + row['key']) + + def fields_func(lib, opts, args): def _print_rows(names): - print(" " + "\n ".join(names)) + names.sort() + print_(u' ' + u'\n '.join(names)) - def _show_plugin_fields(album): - plugin_fields = [] - for plugin in plugins.find_plugins(): - if album: - fdict = plugin.album_template_fields - else: - fdict = plugin.template_fields - plugin_fields += fdict.keys() - if plugin_fields: - print("Template fields from plugins:") - _print_rows(plugin_fields) + print_(u"Item fields:") + _print_rows(library.Item.all_keys()) - print("Item fields:") - _print_rows(library.Item._fields.keys()) - _show_plugin_fields(False) + print_(u"Album fields:") + _print_rows(library.Album.all_keys()) - print("\nAlbum fields:") - _print_rows(library.Album._fields.keys()) - _show_plugin_fields(True) + with lib.transaction() as tx: + # The SQL uses the DISTINCT to get unique values from the query + unique_fields = 'SELECT DISTINCT key FROM (%s)' + print_(u"Item flexible attributes:") + _print_keys(tx.query(unique_fields % library.Item._flex_table)) + + print_(u"Album flexible attributes:") + _print_keys(tx.query(unique_fields % library.Album._flex_table)) fields_cmd = ui.Subcommand( 'fields', - help='show fields available for queries and format strings' + help=u'show fields available for queries and format strings' ) fields_cmd.func = fields_func default_commands.append(fields_cmd) @@ -120,7 +126,7 @@ class HelpCommand(ui.Subcommand): def __init__(self): super(HelpCommand, self).__init__( 'help', aliases=('?',), - help='give detailed help on a specific sub-command', + help=u'give detailed help on a specific sub-command', ) def func(self, lib, opts, args): @@ -128,7 +134,7 @@ class HelpCommand(ui.Subcommand): cmdname = args[0] helpcommand = self.root_parser._subcommand_for_name(cmdname) if not helpcommand: - raise ui.UserError("unknown command '{0}'".format(cmdname)) + raise ui.UserError(u"unknown command '{0}'".format(cmdname)) helpcommand.print_help() else: self.root_parser.print_help() @@ -152,14 +158,14 @@ def disambig_string(info): if isinstance(info, hooks.AlbumInfo): if info.media: - if info.mediums > 1: + if info.mediums and info.mediums > 1: disambig.append(u'{0}x{1}'.format( info.mediums, info.media )) else: disambig.append(info.media) if info.year: - disambig.append(unicode(info.year)) + disambig.append(six.text_type(info.year)) if info.country: disambig.append(info.country) if info.label: @@ -175,13 +181,13 @@ def dist_string(dist): """Formats a distance (a float) as a colorized similarity percentage string. """ - out = '%.1f%%' % ((1 - dist) * 100) + out = u'%.1f%%' % ((1 - dist) * 100) if dist <= config['match']['strong_rec_thresh'].as_number(): - out = ui.colorize('green', out) + out = ui.colorize('text_success', out) elif dist <= config['match']['medium_rec_thresh'].as_number(): - out = ui.colorize('yellow', out) + out = ui.colorize('text_warning', out) else: - out = ui.colorize('red', out) + out = ui.colorize('text_error', out) return out @@ -198,7 +204,7 @@ def penalty_string(distance, limit=None): if penalties: if limit and len(penalties) > limit: penalties = penalties[:limit] + ['...'] - return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + return ui.colorize('text_warning', u'(%s)' % ', '.join(penalties)) def show_change(cur_artist, cur_album, match): @@ -232,9 +238,9 @@ def show_change(cur_artist, cur_album, match): if mediums > 1: return u'{0}-{1}'.format(medium, medium_index) else: - return unicode(medium_index) + return six.text_type(medium_index or index) else: - return unicode(index) + return six.text_type(index) # Identify the album in question. if cur_artist != match.info.artist or \ @@ -249,21 +255,21 @@ def show_change(cur_artist, cur_album, match): artist_l, artist_r = ui.colordiff(artist_l, artist_r) album_l, album_r = ui.colordiff(album_l, album_r) - print_("Correcting tags from:") + print_(u"Correcting tags from:") show_album(artist_l, album_l) - print_("To:") + print_(u"To:") show_album(artist_r, album_r) else: print_(u"Tagging:\n {0.artist} - {0.album}".format(match.info)) # Data URL. if match.info.data_url: - print_('URL:\n %s' % match.info.data_url) + print_(u'URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append('(Similarity: %s)' % dist_string(match.distance)) + info.append(u'(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -271,12 +277,12 @@ def show_change(cur_artist, cur_album, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('lightgray', '(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) print_(' '.join(info)) # Tracks. - pairs = match.mapping.items() - pairs.sort(key=lambda (_, track_info): track_info.index) + pairs = list(match.mapping.items()) + pairs.sort(key=lambda item_and_track_info: item_and_track_info[1].index) # Build up LHS and RHS for track difference display. The `lines` list # contains ``(lhs, rhs, width)`` tuples where `width` is the length (in @@ -289,16 +295,16 @@ def show_change(cur_artist, cur_album, match): if medium != track_info.medium or disctitle != track_info.disctitle: media = match.info.media or 'Media' if match.info.mediums > 1 and track_info.disctitle: - lhs = '%s %s: %s' % (media, track_info.medium, - track_info.disctitle) + lhs = u'%s %s: %s' % (media, track_info.medium, + track_info.disctitle) elif match.info.mediums > 1: - lhs = '%s %s' % (media, track_info.medium) + lhs = u'%s %s' % (media, track_info.medium) elif track_info.disctitle: - lhs = '%s: %s' % (media, track_info.disctitle) + lhs = u'%s: %s' % (media, track_info.disctitle) else: lhs = None if lhs: - lines.append((lhs, '', 0)) + lines.append((lhs, u'', 0)) medium, disctitle = track_info.medium, track_info.disctitle # Titles. @@ -316,9 +322,9 @@ def show_change(cur_artist, cur_album, match): cur_track, new_track = format_index(item), format_index(track_info) if cur_track != new_track: if item.track in (track_info.index, track_info.medium_index): - color = 'lightgray' + color = 'text_highlight_minor' else: - color = 'red' + color = 'text_highlight' templ = ui.colorize(color, u' (#{0})') lhs += templ.format(cur_track) rhs += templ.format(new_track) @@ -330,7 +336,7 @@ def show_change(cur_artist, cur_album, match): config['ui']['length_diff_thresh'].as_number(): cur_length = ui.human_seconds_short(item.length) new_length = ui.human_seconds_short(track_info.length) - templ = ui.colorize('red', u' ({0})') + templ = ui.colorize('text_highlight', u' ({0})') lhs += templ.format(cur_length) rhs += templ.format(new_length) lhs_width += len(cur_length) + 3 @@ -341,9 +347,9 @@ def show_change(cur_artist, cur_album, match): rhs += ' %s' % penalties if lhs != rhs: - lines.append((' * %s' % lhs, rhs, lhs_width)) + lines.append((u' * %s' % lhs, rhs, lhs_width)) elif config['import']['detail']: - lines.append((' * %s' % lhs, '', lhs_width)) + lines.append((u' * %s' % lhs, '', lhs_width)) # Print each track in two columns, or across two lines. col_width = (ui.term_width() - len(''.join([' * ', ' -> ']))) // 2 @@ -360,19 +366,23 @@ def show_change(cur_artist, cur_album, match): # Missing and unmatched tracks. if match.extra_tracks: - print_('Missing tracks:') + print_(u'Missing tracks ({0}/{1} - {2:.1%}):'.format( + len(match.extra_tracks), + len(match.info.tracks), + len(match.extra_tracks) / len(match.info.tracks) + )) for track_info in match.extra_tracks: - line = ' ! %s (#%s)' % (track_info.title, format_index(track_info)) + line = u' ! %s (#%s)' % (track_info.title, format_index(track_info)) if track_info.length: - line += ' (%s)' % ui.human_seconds_short(track_info.length) - print_(ui.colorize('yellow', line)) + line += u' (%s)' % ui.human_seconds_short(track_info.length) + print_(ui.colorize('text_warning', line)) if match.extra_items: - print_('Unmatched tracks:') + print_(u'Unmatched tracks ({0}):'.format(len(match.extra_items))) for item in match.extra_items: - line = ' ! %s (#%s)' % (item.title, format_index(item)) + line = u' ! %s (#%s)' % (item.title, format_index(item)) if item.length: - line += ' (%s)' % ui.human_seconds_short(item.length) - print_(ui.colorize('yellow', line)) + line += u' (%s)' % ui.human_seconds_short(item.length) + print_(ui.colorize('text_warning', line)) def show_item_change(item, match): @@ -386,22 +396,22 @@ def show_item_change(item, match): cur_artist, new_artist = ui.colordiff(cur_artist, new_artist) cur_title, new_title = ui.colordiff(cur_title, new_title) - print_("Correcting track tags from:") - print_(" %s - %s" % (cur_artist, cur_title)) - print_("To:") - print_(" %s - %s" % (new_artist, new_title)) + print_(u"Correcting track tags from:") + print_(u" %s - %s" % (cur_artist, cur_title)) + print_(u"To:") + print_(u" %s - %s" % (new_artist, new_title)) else: - print_("Tagging track: %s - %s" % (cur_artist, cur_title)) + print_(u"Tagging track: %s - %s" % (cur_artist, cur_title)) # Data URL. if match.info.data_url: - print_('URL:\n %s' % match.info.data_url) + print_(u'URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append('(Similarity: %s)' % dist_string(match.distance)) + info.append(u'(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -409,7 +419,7 @@ def show_item_change(item, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('lightgray', '(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) print_(' '.join(info)) @@ -423,7 +433,7 @@ def summarize_items(items, singleton): """ summary_parts = [] if not singleton: - summary_parts.append("{0} items".format(len(items))) + summary_parts.append(u"{0} items".format(len(items))) format_counts = {} for item in items: @@ -432,19 +442,25 @@ def summarize_items(items, singleton): # A single format. summary_parts.append(items[0].format) else: - # Enumerate all the formats. - for format, count in format_counts.iteritems(): - summary_parts.append('{0} {1}'.format(format, count)) + # Enumerate all the formats by decreasing frequencies: + for fmt, count in sorted( + format_counts.items(), + key=lambda fmt_and_count: (-fmt_and_count[1], fmt_and_count[0]) + ): + summary_parts.append('{0} {1}'.format(fmt, count)) - average_bitrate = sum([item.bitrate for item in items]) / len(items) - total_duration = sum([item.length for item in items]) - summary_parts.append('{0}kbps'.format(int(average_bitrate / 1000))) - summary_parts.append(ui.human_seconds_short(total_duration)) + if items: + average_bitrate = sum([item.bitrate for item in items]) / len(items) + total_duration = sum([item.length for item in items]) + total_filesize = sum([item.filesize for item in items]) + summary_parts.append(u'{0}kbps'.format(int(average_bitrate / 1000))) + summary_parts.append(ui.human_seconds_short(total_duration)) + summary_parts.append(ui.human_bytes(total_filesize)) - return ', '.join(summary_parts) + return u', '.join(summary_parts) -def _summary_judment(rec): +def _summary_judgment(rec): """Determines whether a decision should be made without even asking the user. This occurs in quiet mode and when an action is chosen for NONE recommendations. Return an action or None if the user should be @@ -471,14 +487,15 @@ def _summary_judment(rec): return None if action == importer.action.SKIP: - print_('Skipping.') + print_(u'Skipping.') elif action == importer.action.ASIS: - print_('Importing as-is.') + print_(u'Importing as-is.') return action def choose_candidate(candidates, singleton, rec, cur_artist=None, - cur_album=None, item=None, itemcount=None): + cur_album=None, item=None, itemcount=None, + choices=[]): """Given a sorted list of candidates, ask the user for a selection of which candidate to use. Applies to both full albums and singletons (tracks). Candidates are either AlbumMatch or TrackMatch @@ -486,8 +503,12 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, `cur_album`, and `itemcount` must be provided. For singletons, `item` must be provided. - Returns the result of the choice, which may SKIP, ASIS, TRACKS, or - MANUAL or a candidate (an AlbumMatch/TrackMatch object). + `choices` is a list of `PromptChoice`s to be used in each prompt. + + Returns one of the following: + * the result of the choice, which may be SKIP or ASIS + * a candidate (an AlbumMatch/TrackMatch object) + * a chosen `PromptChoice` from `choices` """ # Sanity check. if singleton: @@ -496,35 +517,22 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, assert cur_artist is not None assert cur_album is not None + # Build helper variables for the prompt choices. + choice_opts = tuple(c.long for c in choices) + choice_actions = {c.short: c for c in choices} + # Zero candidates. if not candidates: if singleton: - print_("No matching recordings found.") - opts = ('Use as-is', 'Skip', 'Enter search', 'enter Id', - 'aBort') + print_(u"No matching recordings found.") else: - print_("No matching release found for {0} tracks." + print_(u"No matching release found for {0} tracks." .format(itemcount)) - print_('For help, see: ' - 'http://beets.readthedocs.org/en/latest/faq.html#nomatch') - opts = ('Use as-is', 'as Tracks', 'Group albums', 'Skip', - 'Enter search', 'enter Id', 'aBort') - sel = ui.input_options(opts) - if sel == 'u': - return importer.action.ASIS - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'e': - return importer.action.MANUAL - elif sel == 's': - return importer.action.SKIP - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID - elif sel == 'g': - return importer.action.ALBUMS + print_(u'For help, see: ' + u'http://beets.readthedocs.org/en/latest/faq.html#nomatch') + sel = ui.input_options(choice_opts) + if sel in choice_actions: + return choice_actions[sel] else: assert False @@ -566,35 +574,18 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Disambiguation disambig = disambig_string(match.info) if disambig: - line.append(ui.colorize('lightgray', '(%s)' % disambig)) + line.append(ui.colorize('text_highlight_minor', + u'(%s)' % disambig)) - print_(' '.join(line)) + print_(u' '.join(line)) # Ask the user for a choice. - if singleton: - opts = ('Skip', 'Use as-is', 'Enter search', 'enter Id', - 'aBort') - else: - opts = ('Skip', 'Use as-is', 'as Tracks', 'Group albums', - 'Enter search', 'enter Id', 'aBort') - sel = ui.input_options(opts, numrange=(1, len(candidates))) - if sel == 's': - return importer.action.SKIP - elif sel == 'u': - return importer.action.ASIS - elif sel == 'm': + sel = ui.input_options(choice_opts, + numrange=(1, len(candidates))) + if sel == u'm': pass - elif sel == 'e': - return importer.action.MANUAL - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID - elif sel == 'g': - return importer.action.ALBUMS + elif sel in choice_actions: + return choice_actions[sel] else: # Numerical selection. match = candidates[sel - 1] if sel != 1: @@ -614,55 +605,62 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, return match # Ask for confirmation. - if singleton: - opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', - 'Enter search', 'enter Id', 'aBort') - else: - opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', - 'as Tracks', 'Group albums', 'Enter search', 'enter Id', - 'aBort') default = config['import']['default_action'].as_choice({ - 'apply': 'a', - 'skip': 's', - 'asis': 'u', - 'none': None, + u'apply': u'a', + u'skip': u's', + u'asis': u'u', + u'none': None, }) if default is None: require = True - sel = ui.input_options(opts, require=require, default=default) - if sel == 'a': + sel = ui.input_options((u'Apply', u'More candidates') + choice_opts, + require=require, default=default) + if sel == u'a': return match - elif sel == 'g': - return importer.action.ALBUMS - elif sel == 's': - return importer.action.SKIP - elif sel == 'u': - return importer.action.ASIS - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'e': - return importer.action.MANUAL - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID + elif sel in choice_actions: + return choice_actions[sel] -def manual_search(singleton): - """Input either an artist and album (for full albums) or artist and +def manual_search(session, task): + """Get a new `Proposal` using manual search criteria. + + Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. """ - artist = input_('Artist:') - name = input_('Track:' if singleton else 'Album:') - return artist.strip(), name.strip() + artist = input_(u'Artist:').strip() + name = input_(u'Album:' if task.is_album else u'Track:').strip() + + if task.is_album: + _, _, prop = autotag.tag_album( + task.items, artist, name + ) + return prop + else: + return autotag.tag_item(task.item, artist, name) -def manual_id(singleton): - """Input an ID, either for an album ("release") or a track ("recording"). +def manual_id(session, task): + """Get a new `Proposal` using a manually-entered ID. + + Input an ID, either for an album ("release") or a track ("recording"). """ - prompt = u'Enter {0} ID:'.format('recording' if singleton else 'release') - return input_(prompt).strip() + prompt = u'Enter {0} ID:'.format(u'release' if task.is_album + else u'recording') + search_id = input_(prompt).strip() + + if task.is_album: + _, _, prop = autotag.tag_album( + task.items, search_ids=search_id.split() + ) + return prop + else: + return autotag.tag_item(task.item, search_ids=search_id.split()) + + +def abort_action(session, task): + """A prompt choice callback that aborts the importer. + """ + raise importer.ImportAbort() class TerminalImportSession(importer.ImportSession): @@ -679,7 +677,7 @@ class TerminalImportSession(importer.ImportSession): u' ({0} items)'.format(len(task.items))) # Take immediate action if appropriate. - action = _summary_judment(task.rec) + action = _summary_judgment(task.rec) if action == importer.action.APPLY: match = task.candidates[0] show_change(task.cur_artist, task.cur_album, match) @@ -690,30 +688,33 @@ class TerminalImportSession(importer.ImportSession): # Loop until we have a choice. candidates, rec = task.candidates, task.rec while True: - # Ask for a choice from the user. + # Ask for a choice from the user. The result of + # `choose_candidate` may be an `importer.action`, an + # `AlbumMatch` object for a specific selection, or a + # `PromptChoice`. + choices = self._get_choices(task) choice = choose_candidate( candidates, False, rec, task.cur_artist, task.cur_album, - itemcount=len(task.items) + itemcount=len(task.items), choices=choices ) - # Choose which tags to use. - if choice in (importer.action.SKIP, importer.action.ASIS, - importer.action.TRACKS, importer.action.ALBUMS): + # Basic choices that require no more action here. + if choice in (importer.action.SKIP, importer.action.ASIS): # Pass selection to main control flow. return choice - elif choice is importer.action.MANUAL: - # Try again with manual search terms. - search_artist, search_album = manual_search(False) - _, _, candidates, rec = autotag.tag_album( - task.items, search_artist, search_album - ) - elif choice is importer.action.MANUAL_ID: - # Try a manually-entered ID. - search_id = manual_id(False) - if search_id: - _, _, candidates, rec = autotag.tag_album( - task.items, search_id=search_id - ) + + # Plugin-provided choices. We invoke the associated callback + # function. + elif choice in choices: + post_choice = choice.callback(self, task) + if isinstance(post_choice, importer.action): + return post_choice + elif isinstance(post_choice, autotag.Proposal): + # Use the new candidates and continue around the loop. + candidates = post_choice.candidates + rec = post_choice.recommendation + + # Otherwise, we have a specific match selection. else: # We have a candidate! Finish tagging. Here, choice is an # AlbumMatch object. @@ -725,11 +726,11 @@ class TerminalImportSession(importer.ImportSession): either an action constant or a TrackMatch object. """ print_() - print_(task.item.path) + print_(displayable_path(task.item.path)) candidates, rec = task.candidates, task.rec # Take immediate action if appropriate. - action = _summary_judment(task.rec) + action = _summary_judgment(task.rec) if action == importer.action.APPLY: match = candidates[0] show_item_change(task.item, match) @@ -739,23 +740,21 @@ class TerminalImportSession(importer.ImportSession): while True: # Ask for a choice. - choice = choose_candidate(candidates, True, rec, item=task.item) + choices = self._get_choices(task) + choice = choose_candidate(candidates, True, rec, item=task.item, + choices=choices) if choice in (importer.action.SKIP, importer.action.ASIS): return choice - elif choice == importer.action.TRACKS: - assert False # TRACKS is only legal for albums. - elif choice == importer.action.MANUAL: - # Continue in the loop with a new set of candidates. - search_artist, search_title = manual_search(True) - candidates, rec = autotag.tag_item(task.item, search_artist, - search_title) - elif choice == importer.action.MANUAL_ID: - # Ask for a track ID. - search_id = manual_id(True) - if search_id: - candidates, rec = autotag.tag_item(task.item, - search_id=search_id) + + elif choice in choices: + post_choice = choice.callback(self, task) + if isinstance(post_choice, importer.action): + return post_choice + elif isinstance(post_choice, autotag.Proposal): + candidates = post_choice.candidates + rec = post_choice.recommendation + else: # Chose a candidate. assert isinstance(choice, autotag.TrackMatch) @@ -765,37 +764,38 @@ class TerminalImportSession(importer.ImportSession): """Decide what to do when a new album or item seems similar to one that's already in the library. """ - log.warn(u"This {0} is already in the library!" - .format("album" if task.is_album else "item")) + log.warning(u"This {0} is already in the library!", + (u"album" if task.is_album else u"item")) if config['import']['quiet']: # In quiet mode, don't prompt -- just skip. log.info(u'Skipping.') - sel = 's' + sel = u's' else: # Print some detail about the existing and new items so the # user can make an informed decision. for duplicate in found_duplicates: - print("Old: " + summarize_items( + print_(u"Old: " + summarize_items( list(duplicate.items()) if task.is_album else [duplicate], not task.is_album, )) - print("New: " + summarize_items( + + print_(u"New: " + summarize_items( task.imported_items(), not task.is_album, )) sel = ui.input_options( - ('Skip new', 'Keep both', 'Remove old') + (u'Skip new', u'Keep both', u'Remove old') ) - if sel == 's': + if sel == u's': # Skip new. task.set_choice(importer.action.SKIP) - elif sel == 'k': + elif sel == u'k': # Keep both. Do nothing; leave the choice intact. pass - elif sel == 'r': + elif sel == u'r': # Remove old. task.should_remove_duplicates = True else: @@ -803,9 +803,73 @@ class TerminalImportSession(importer.ImportSession): def should_resume(self, path): return ui.input_yn(u"Import of the directory:\n{0}\n" - "was interrupted. Resume (Y/n)?" + u"was interrupted. Resume (Y/n)?" .format(displayable_path(path))) + def _get_choices(self, task): + """Get the list of prompt choices that should be presented to the + user. This consists of both built-in choices and ones provided by + plugins. + + The `before_choose_candidate` event is sent to the plugins, with + session and task as its parameters. Plugins are responsible for + checking the right conditions and returning a list of `PromptChoice`s, + which is flattened and checked for conflicts. + + If two or more choices have the same short letter, a warning is + emitted and all but one choices are discarded, giving preference + to the default importer choices. + + Returns a list of `PromptChoice`s. + """ + # Standard, built-in choices. + choices = [ + PromptChoice(u's', u'Skip', + lambda s, t: importer.action.SKIP), + PromptChoice(u'u', u'Use as-is', + lambda s, t: importer.action.ASIS) + ] + if task.is_album: + choices += [ + PromptChoice(u't', u'as Tracks', + lambda s, t: importer.action.TRACKS), + PromptChoice(u'g', u'Group albums', + lambda s, t: importer.action.ALBUMS), + ] + choices += [ + PromptChoice(u'e', u'Enter search', manual_search), + PromptChoice(u'i', u'enter Id', manual_id), + PromptChoice(u'b', u'aBort', abort_action), + ] + + # Send the before_choose_candidate event and flatten list. + extra_choices = list(chain(*plugins.send('before_choose_candidate', + session=self, task=task))) + + # Add a "dummy" choice for the other baked-in option, for + # duplicate checking. + all_choices = [ + PromptChoice(u'a', u'Apply', None), + ] + choices + extra_choices + + # Check for conflicts. + short_letters = [c.short for c in all_choices] + if len(short_letters) != len(set(short_letters)): + # Duplicate short letter has been found. + duplicates = [i for i, count in Counter(short_letters).items() + if count > 1] + for short in duplicates: + # Keep the first of the choices, removing the rest. + dup_choices = [c for c in all_choices if c.short == short] + for c in dup_choices[1:]: + log.warning(u"Prompt choice '{0}' removed due to conflict " + u"with '{1}' (short letter: '{2}')", + c.long, dup_choices[0].long, c.short) + extra_choices.remove(c) + + return choices + extra_choices + + # The import command. @@ -821,33 +885,26 @@ def import_files(lib, paths, query): # Check parameter consistency. if config['import']['quiet'] and config['import']['timid']: - raise ui.UserError("can't be both quiet and timid") + raise ui.UserError(u"can't be both quiet and timid") # Open the log. if config['import']['log'].get() is not None: - logpath = config['import']['log'].as_filename() + logpath = syspath(config['import']['log'].as_filename()) try: - logfile = codecs.open(syspath(logpath), 'a', 'utf8') + loghandler = logging.FileHandler(logpath) except IOError: - raise ui.UserError(u"could not open log file for writing: %s" % - displayable_path(logpath)) - print(u'import started', time.asctime(), file=logfile) + raise ui.UserError(u"could not open log file for writing: " + u"{0}".format(displayable_path(logpath))) else: - logfile = None + loghandler = None # Never ask for input in quiet mode. if config['import']['resume'].get() == 'ask' and \ config['import']['quiet']: config['import']['resume'] = False - session = TerminalImportSession(lib, logfile, paths, query) - try: - session.run() - finally: - # If we were logging, close the file. - if logfile: - print(u'', file=logfile) - logfile.close() + session = TerminalImportSession(lib, loghandler, paths, query) + session.run() # Emit event. plugins.send('import', lib=lib, paths=paths) @@ -868,85 +925,94 @@ def import_func(lib, opts, args): query = None paths = args if not paths: - raise ui.UserError('no path specified') + raise ui.UserError(u'no path specified') import_files(lib, paths, query) import_cmd = ui.Subcommand( - 'import', help='import new music', aliases=('imp', 'im') + u'import', help=u'import new music', aliases=(u'imp', u'im') ) import_cmd.parser.add_option( - '-c', '--copy', action='store_true', default=None, - help="copy tracks into library directory (default)" + u'-c', u'--copy', action='store_true', default=None, + help=u"copy tracks into library directory (default)" ) import_cmd.parser.add_option( - '-C', '--nocopy', action='store_false', dest='copy', - help="don't copy tracks (opposite of -c)" + u'-C', u'--nocopy', action='store_false', dest='copy', + help=u"don't copy tracks (opposite of -c)" ) import_cmd.parser.add_option( - '-w', '--write', action='store_true', default=None, - help="write new metadata to files' tags (default)" + u'-m', u'--move', action='store_true', dest='move', + help=u"move tracks into the library (overrides -c)" ) import_cmd.parser.add_option( - '-W', '--nowrite', action='store_false', dest='write', - help="don't write metadata (opposite of -w)" + u'-w', u'--write', action='store_true', default=None, + help=u"write new metadata to files' tags (default)" ) import_cmd.parser.add_option( - '-a', '--autotag', action='store_true', dest='autotag', - help="infer tags for imported files (default)" + u'-W', u'--nowrite', action='store_false', dest='write', + help=u"don't write metadata (opposite of -w)" ) import_cmd.parser.add_option( - '-A', '--noautotag', action='store_false', dest='autotag', - help="don't infer tags for imported files (opposite of -a)" + u'-a', u'--autotag', action='store_true', dest='autotag', + help=u"infer tags for imported files (default)" ) import_cmd.parser.add_option( - '-p', '--resume', action='store_true', default=None, - help="resume importing if interrupted" + u'-A', u'--noautotag', action='store_false', dest='autotag', + help=u"don't infer tags for imported files (opposite of -a)" ) import_cmd.parser.add_option( - '-P', '--noresume', action='store_false', dest='resume', - help="do not try to resume importing" + u'-p', u'--resume', action='store_true', default=None, + help=u"resume importing if interrupted" ) import_cmd.parser.add_option( - '-q', '--quiet', action='store_true', dest='quiet', - help="never prompt for input: skip albums instead" + u'-P', u'--noresume', action='store_false', dest='resume', + help=u"do not try to resume importing" ) import_cmd.parser.add_option( - '-l', '--log', dest='log', - help='file to log untaggable albums for later review' + u'-q', u'--quiet', action='store_true', dest='quiet', + help=u"never prompt for input: skip albums instead" ) import_cmd.parser.add_option( - '-s', '--singletons', action='store_true', - help='import individual tracks instead of full albums' + u'-l', u'--log', dest='log', + help=u'file to log untaggable albums for later review' ) import_cmd.parser.add_option( - '-t', '--timid', dest='timid', action='store_true', - help='always confirm all actions' + u'-s', u'--singletons', action='store_true', + help=u'import individual tracks instead of full albums' ) import_cmd.parser.add_option( - '-L', '--library', dest='library', action='store_true', - help='retag items matching a query' + u'-t', u'--timid', dest='timid', action='store_true', + help=u'always confirm all actions' ) import_cmd.parser.add_option( - '-i', '--incremental', dest='incremental', action='store_true', - help='skip already-imported directories' + u'-L', u'--library', dest='library', action='store_true', + help=u'retag items matching a query' ) import_cmd.parser.add_option( - '-I', '--noincremental', dest='incremental', action='store_false', - help='do not skip already-imported directories' + u'-i', u'--incremental', dest='incremental', action='store_true', + help=u'skip already-imported directories' ) import_cmd.parser.add_option( - '--flat', dest='flat', action='store_true', - help='import an entire tree as a single album' + u'-I', u'--noincremental', dest='incremental', action='store_false', + help=u'do not skip already-imported directories' ) import_cmd.parser.add_option( - '-g', '--group-albums', dest='group_albums', action='store_true', - help='group tracks in a folder into separate albums' + u'--flat', dest='flat', action='store_true', + help=u'import an entire tree as a single album' ) import_cmd.parser.add_option( - '--pretend', dest='pretend', action='store_true', - help='just print the files to import' + u'-g', u'--group-albums', dest='group_albums', action='store_true', + help=u'group tracks in a folder into separate albums' +) +import_cmd.parser.add_option( + u'--pretend', dest='pretend', action='store_true', + help=u'just print the files to import' +) +import_cmd.parser.add_option( + u'-S', u'--search-id', dest='search_ids', action='append', + metavar='ID', + help=u'restrict matching to a specific metadata backend ID' ) import_cmd.func = import_func default_commands.append(import_cmd) @@ -954,51 +1020,44 @@ default_commands.append(import_cmd) # list: Query and show library contents. -def list_items(lib, query, album, fmt): +def list_items(lib, query, album, fmt=u''): """Print out items in lib matching query. If album, then search for albums instead of single items. """ - tmpl = Template(ui._pick_format(album, fmt)) if album: for album in lib.albums(query): - ui.print_obj(album, lib, tmpl) + ui.print_(format(album, fmt)) else: for item in lib.items(query): - ui.print_obj(item, lib, tmpl) + ui.print_(format(item, fmt)) def list_func(lib, opts, args): - if opts.path: - fmt = '$path' - else: - fmt = opts.format - list_items(lib, decargs(args), opts.album, fmt) + list_items(lib, decargs(args), opts.album) -list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) -list_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='show matching albums instead of tracks' -) -list_cmd.parser.add_option( - '-p', '--path', action='store_true', - help='print paths for matched items or albums' -) -list_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None -) +list_cmd = ui.Subcommand(u'list', help=u'query the library', aliases=(u'ls',)) +list_cmd.parser.usage += u"\n" \ + u'Example: %prog -f \'$album: $title\' artist:beatles' +list_cmd.parser.add_all_common_options() list_cmd.func = list_func default_commands.append(list_cmd) # update: Update library contents according to on-disk tags. -def update_items(lib, query, album, move, pretend): +def update_items(lib, query, album, move, pretend, fields): """For all the items matched by the query, update the library to reflect the item's embedded tags. + :param fields: The fields to be stored. If not specified, all fields will + be. """ with lib.transaction(): + if move and fields is not None and 'path' not in fields: + # Special case: if an item needs to be moved, the path field has to + # updated; otherwise the new path will not be reflected in the + # database. + fields.append('path') items, _ = _do_query(lib, query, album) # Walk through the items and pick up their changes. @@ -1006,8 +1065,8 @@ def update_items(lib, query, album, move, pretend): for item in items: # Item deleted? if not os.path.exists(syspath(item.path)): - ui.print_obj(item, lib) - ui.print_(ui.colorize('red', u' deleted')) + ui.print_(format(item)) + ui.print_(ui.colorize('text_error', u' deleted')) if not pretend: item.remove(True) affected_albums.add(item.album_id) @@ -1015,16 +1074,16 @@ def update_items(lib, query, album, move, pretend): # Did the item change since last checked? if item.current_mtime() <= item.mtime: - log.debug(u'skipping {0} because mtime is up to date ({1})' - .format(displayable_path(item.path), item.mtime)) + log.debug(u'skipping {0} because mtime is up to date ({1})', + displayable_path(item.path), item.mtime) continue # Read new data. try: item.read() except library.ReadError as exc: - log.error(u'error reading {0}: {1}'.format( - displayable_path(item.path), exc)) + log.error(u'error reading {0}: {1}', + displayable_path(item.path), exc) continue # Special-case album artist when it matches track artist. (Hacky @@ -1034,27 +1093,28 @@ def update_items(lib, query, album, move, pretend): old_item = lib.get_item(item.id) if old_item.albumartist == old_item.artist == item.artist: item.albumartist = old_item.albumartist - item._dirty.discard('albumartist') + item._dirty.discard(u'albumartist') # Check for and display changes. - changed = ui.show_model_changes(item, - fields=library.Item._media_fields) + changed = ui.show_model_changes( + item, + fields=fields or library.Item._media_fields) # Save changes. if not pretend: if changed: # Move the item if it's in the library. if move and lib.directory in ancestry(item.path): - item.move() + item.move(store=False) - item.store() + item.store(fields=fields) affected_albums.add(item.album_id) else: # The file's mtime was different, but there were no # changes to the metadata. Store the new mtime, # which is set in the call to read(), so we don't # check this again in the future. - item.store() + item.store(fields=fields) # Skip album changes while pretending. if pretend: @@ -1066,43 +1126,53 @@ def update_items(lib, query, album, move, pretend): continue album = lib.get_album(album_id) if not album: # Empty albums have already been removed. - log.debug(u'emptied album {0}'.format(album_id)) + log.debug(u'emptied album {0}', album_id) continue first_item = album.items().get() # Update album structure to reflect an item in it. for key in library.Album.item_keys: album[key] = first_item[key] - album.store() + album.store(fields=fields) # Move album art (and any inconsistent items). if move and lib.directory in ancestry(first_item.path): - log.debug(u'moving album {0}'.format(album_id)) - album.move() + log.debug(u'moving album {0}', album_id) + + # Manually moving and storing the album. + items = list(album.items()) + for item in items: + item.move(store=False) + item.store(fields=fields) + album.move(store=False) + album.store(fields=fields) def update_func(lib, opts, args): - update_items(lib, decargs(args), opts.album, opts.move, opts.pretend) + update_items(lib, decargs(args), opts.album, ui.should_move(opts.move), + opts.pretend, opts.fields) update_cmd = ui.Subcommand( - 'update', help='update the library', aliases=('upd', 'up',) + u'update', help=u'update the library', aliases=(u'upd', u'up',) +) +update_cmd.parser.add_album_option() +update_cmd.parser.add_format_option() +update_cmd.parser.add_option( + u'-m', u'--move', action='store_true', dest='move', + help=u"move files in the library directory" ) update_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='match albums instead of tracks' + u'-M', u'--nomove', action='store_false', dest='move', + help=u"don't move files in library" ) update_cmd.parser.add_option( - '-M', '--nomove', action='store_false', default=True, dest='move', - help="don't move files in library" + u'-p', u'--pretend', action='store_true', + help=u"show all changes but do nothing" ) update_cmd.parser.add_option( - '-p', '--pretend', action='store_true', - help="show all changes but do nothing" -) -update_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None + u'-F', u'--field', default=None, action='append', dest='fields', + help=u'list of fields to update' ) update_cmd.func = update_func default_commands.append(update_cmd) @@ -1110,30 +1180,33 @@ default_commands.append(update_cmd) # remove: Remove items from library, delete files. -def remove_items(lib, query, album, delete): +def remove_items(lib, query, album, delete, force): """Remove items matching query from lib. If album, then match and remove whole albums. If delete, also remove files from disk. """ # Get the matching items. items, albums = _do_query(lib, query, album) - # Prepare confirmation with user. - print_() - if delete: - fmt = u'$path - $title' - prompt = 'Really DELETE %i files (y/n)?' % len(items) - else: - fmt = None - prompt = 'Really remove %i items from the library (y/n)?' % \ - len(items) + # Confirm file removal if not forcing removal. + if not force: + # Prepare confirmation with user. + print_() + if delete: + fmt = u'$path - $title' + prompt = u'Really DELETE %i file%s (y/n)?' % \ + (len(items), 's' if len(items) > 1 else '') + else: + fmt = u'' + prompt = u'Really remove %i item%s from the library (y/n)?' % \ + (len(items), 's' if len(items) > 1 else '') - # Show all the items. - for item in items: - ui.print_obj(item, lib, fmt) + # Show all the items. + for item in items: + ui.print_(format(item, fmt)) - # Confirm with user. - if not ui.input_yn(prompt, True): - return + # Confirm with user. + if not ui.input_yn(prompt, True): + return # Remove (and possibly delete) items. with lib.transaction(): @@ -1142,20 +1215,21 @@ def remove_items(lib, query, album, delete): def remove_func(lib, opts, args): - remove_items(lib, decargs(args), opts.album, opts.delete) + remove_items(lib, decargs(args), opts.album, opts.delete, opts.force) remove_cmd = ui.Subcommand( - 'remove', help='remove matching items from the library', aliases=('rm',) + u'remove', help=u'remove matching items from the library', aliases=(u'rm',) ) remove_cmd.parser.add_option( - "-d", "--delete", action="store_true", - help="also remove files from disk" + u"-d", u"--delete", action="store_true", + help=u"also remove files from disk" ) remove_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='match albums instead of tracks' + u"-f", u"--force", action="store_true", + help=u"do not ask when removing items" ) +remove_cmd.parser.add_album_option() remove_cmd.func = remove_func default_commands.append(remove_cmd) @@ -1175,7 +1249,10 @@ def show_stats(lib, query, exact): for item in items: if exact: - total_size += os.path.getsize(item.path) + try: + total_size += os.path.getsize(syspath(item.path)) + except OSError as exc: + log.info(u'could not get size of {}: {}', item.path, exc) else: total_size += int(item.length * item.bitrate / 8) total_time += item.length @@ -1185,11 +1262,11 @@ def show_stats(lib, query, exact): if item.album_id: albums.add(item.album_id) - size_str = '' + ui.human_bytes(total_size) + size_str = u'' + ui.human_bytes(total_size) if exact: - size_str += ' ({0} bytes)'.format(total_size) + size_str += u' ({0} bytes)'.format(total_size) - print_("""Tracks: {0} + print_(u"""Tracks: {0} Total time: {1}{2} {3}: {4} Artists: {5} @@ -1197,8 +1274,8 @@ Albums: {6} Album artists: {7}""".format( total_items, ui.human_seconds(total_time), - ' ({0:.2f} seconds)'.format(total_time) if exact else '', - 'Total size' if exact else 'Approximate total size', + u' ({0:.2f} seconds)'.format(total_time) if exact else '', + u'Total size' if exact else u'Approximate total size', size_str, len(artists), len(albums), @@ -1211,11 +1288,11 @@ def stats_func(lib, opts, args): stats_cmd = ui.Subcommand( - 'stats', help='show statistics about the library or a query' + u'stats', help=u'show statistics about the library or a query' ) stats_cmd.parser.add_option( - '-e', '--exact', action='store_true', - help='exact size and time' + u'-e', u'--exact', action='store_true', + help=u'exact size and time' ) stats_cmd.func = stats_func default_commands.append(stats_cmd) @@ -1224,17 +1301,18 @@ default_commands.append(stats_cmd) # version: Show current beets version. def show_version(lib, opts, args): - print_('beets version %s' % beets.__version__) + print_(u'beets version %s' % beets.__version__) + print_(u'Python version {}'.format(python_version())) # Show plugins. - names = [p.name for p in plugins.find_plugins()] + names = sorted(p.name for p in plugins.find_plugins()) if names: - print_('plugins:', ', '.join(names)) + print_(u'plugins:', ', '.join(names)) else: - print_('no plugins loaded') + print_(u'no plugins loaded') version_cmd = ui.Subcommand( - 'version', help='output version information' + u'version', help=u'output version information' ) version_cmd.func = show_version default_commands.append(version_cmd) @@ -1261,49 +1339,54 @@ def modify_items(lib, mods, dels, query, write, move, album, confirm): # Apply changes *temporarily*, preview them, and collect modified # objects. - print_('Modifying {0} {1}s.' - .format(len(objs), 'album' if album else 'item')) + print_(u'Modifying {0} {1}s.' + .format(len(objs), u'album' if album else u'item')) changed = set() for obj in objs: - obj.update(mods) - for field in dels: - try: - del obj[field] - except KeyError: - pass - if ui.show_model_changes(obj): + if print_and_modify(obj, mods, dels): changed.add(obj) # Still something to do? if not changed: - print_('No changes to make.') + print_(u'No changes to make.') return # Confirm action. if confirm: if write and move: - extra = ', move and write tags' + extra = u', move and write tags' elif write: - extra = ' and write tags' + extra = u' and write tags' elif move: - extra = ' and move' + extra = u' and move' else: - extra = '' + extra = u'' - if not ui.input_yn('Really modify%s (Y/n)?' % extra): - return + changed = ui.input_select_objects( + u'Really modify%s' % extra, changed, + lambda o: print_and_modify(o, mods, dels) + ) # Apply changes to database and files with lib.transaction(): for obj in changed: - if move: - cur_path = obj.path - if lib.directory in ancestry(cur_path): # In library? - log.debug(u'moving object {0}' - .format(displayable_path(cur_path))) - obj.move() + obj.try_sync(write, move) - obj.try_sync(write) + +def print_and_modify(obj, mods, dels): + """Print the modifications to an item and return a bool indicating + whether any changes were made. + + `mods` is a dictionary of fields and values to update on the object; + `dels` is a sequence of fields to delete. + """ + obj.update(mods) + for field in dels: + try: + del obj[field] + except KeyError: + pass + return ui.show_model_changes(obj) def modify_parse_args(args): @@ -1328,39 +1411,35 @@ def modify_parse_args(args): def modify_func(lib, opts, args): query, mods, dels = modify_parse_args(decargs(args)) if not mods and not dels: - raise ui.UserError('no modifications specified') - write = opts.write if opts.write is not None else \ - config['import']['write'].get(bool) - modify_items(lib, mods, dels, query, write, opts.move, opts.album, - not opts.yes) + raise ui.UserError(u'no modifications specified') + modify_items(lib, mods, dels, query, ui.should_write(opts.write), + ui.should_move(opts.move), opts.album, not opts.yes) modify_cmd = ui.Subcommand( - 'modify', help='change metadata fields', aliases=('mod',) + u'modify', help=u'change metadata fields', aliases=(u'mod',) ) modify_cmd.parser.add_option( - '-M', '--nomove', action='store_false', default=True, dest='move', - help="don't move files in library" + u'-m', u'--move', action='store_true', dest='move', + help=u"move files in the library directory" ) modify_cmd.parser.add_option( - '-w', '--write', action='store_true', default=None, - help="write new metadata to files' tags (default)" + u'-M', u'--nomove', action='store_false', dest='move', + help=u"don't move files in library" ) modify_cmd.parser.add_option( - '-W', '--nowrite', action='store_false', dest='write', - help="don't write metadata (opposite of -w)" + u'-w', u'--write', action='store_true', default=None, + help=u"write new metadata to files' tags (default)" ) modify_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='modify whole albums instead of tracks' + u'-W', u'--nowrite', action='store_false', dest='write', + help=u"don't write metadata (opposite of -w)" ) +modify_cmd.parser.add_album_option() +modify_cmd.parser.add_format_option(target='item') modify_cmd.parser.add_option( - '-y', '--yes', action='store_true', - help='skip confirmation' -) -modify_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None + u'-y', u'--yes', action='store_true', + help=u'skip confirmation' ) modify_cmd.func = modify_func default_commands.append(modify_cmd) @@ -1368,7 +1447,7 @@ default_commands.append(modify_cmd) # move: Move/copy files to the library or a new base directory. -def move_items(lib, dest, query, copy, album): +def move_items(lib, dest, query, copy, album, pretend, confirm=False): """Moves or copies items to a new base directory, given by dest. If dest is None, then the library's base directory is used, making the command "consolidate" files. @@ -1376,14 +1455,38 @@ def move_items(lib, dest, query, copy, album): items, albums = _do_query(lib, query, album, False) objs = albums if album else items - action = 'Copying' if copy else 'Moving' - entity = 'album' if album else 'item' - log.info(u'{0} {1} {2}s.'.format(action, len(objs), entity)) - for obj in objs: - log.debug(u'moving: {0}'.format(util.displayable_path(obj.path))) + # Filter out files that don't need to be moved. + isitemmoved = lambda item: item.path != item.destination(basedir=dest) + isalbummoved = lambda album: any(isitemmoved(i) for i in album.items()) + objs = [o for o in objs if (isalbummoved if album else isitemmoved)(o)] - obj.move(copy, basedir=dest) - obj.store() + action = u'Copying' if copy else u'Moving' + act = u'copy' if copy else u'move' + entity = u'album' if album else u'item' + log.info(u'{0} {1} {2}{3}.', action, len(objs), entity, + u's' if len(objs) != 1 else u'') + if not objs: + return + + if pretend: + if album: + show_path_changes([(item.path, item.destination(basedir=dest)) + for obj in objs for item in obj.items()]) + else: + show_path_changes([(obj.path, obj.destination(basedir=dest)) + for obj in objs]) + else: + if confirm: + objs = ui.input_select_objects( + u'Really %s' % act, objs, + lambda o: show_path_changes( + [(o.path, o.destination(basedir=dest))])) + + for obj in objs: + log.debug(u'moving: {0}', util.displayable_path(obj.path)) + + obj.move(copy, basedir=dest) + obj.store() def move_func(lib, opts, args): @@ -1391,26 +1494,32 @@ def move_func(lib, opts, args): if dest is not None: dest = normpath(dest) if not os.path.isdir(dest): - raise ui.UserError('no such directory: %s' % dest) + raise ui.UserError(u'no such directory: %s' % dest) - move_items(lib, dest, decargs(args), opts.copy, opts.album) + move_items(lib, dest, decargs(args), opts.copy, opts.album, opts.pretend, + opts.timid) move_cmd = ui.Subcommand( - 'move', help='move or copy items', aliases=('mv',) + u'move', help=u'move or copy items', aliases=(u'mv',) ) move_cmd.parser.add_option( - '-d', '--dest', metavar='DIR', dest='dest', - help='destination directory' + u'-d', u'--dest', metavar='DIR', dest='dest', + help=u'destination directory' ) move_cmd.parser.add_option( - '-c', '--copy', default=False, action='store_true', - help='copy instead of moving' + u'-c', u'--copy', default=False, action='store_true', + help=u'copy instead of moving' ) move_cmd.parser.add_option( - '-a', '--album', default=False, action='store_true', - help='match whole albums instead of tracks' + u'-p', u'--pretend', default=False, action='store_true', + help=u'show how files would be moved, but don\'t touch anything' ) +move_cmd.parser.add_option( + u'-t', u'--timid', dest='timid', action='store_true', + help=u'always confirm all actions' +) +move_cmd.parser.add_album_option() move_cmd.func = move_func default_commands.append(move_cmd) @@ -1426,39 +1535,38 @@ def write_items(lib, query, pretend, force): for item in items: # Item deleted? if not os.path.exists(syspath(item.path)): - log.info(u'missing file: {0}'.format( - util.displayable_path(item.path) - )) + log.info(u'missing file: {0}', util.displayable_path(item.path)) continue # Get an Item object reflecting the "clean" (on-disk) state. try: clean_item = library.Item.from_path(item.path) except library.ReadError as exc: - log.error(u'error reading {0}: {1}'.format( - displayable_path(item.path), exc - )) + log.error(u'error reading {0}: {1}', + displayable_path(item.path), exc) continue # Check for and display changes. changed = ui.show_model_changes(item, clean_item, - library.Item._media_fields, force) + library.Item._media_tag_fields, force) if (changed or force) and not pretend: - item.try_sync() + # We use `try_sync` here to keep the mtime up to date in the + # database. + item.try_sync(True, False) def write_func(lib, opts, args): write_items(lib, decargs(args), opts.pretend, opts.force) -write_cmd = ui.Subcommand('write', help='write tag information to files') +write_cmd = ui.Subcommand(u'write', help=u'write tag information to files') write_cmd.parser.add_option( - '-p', '--pretend', action='store_true', - help="show all changes but do nothing" + u'-p', u'--pretend', action='store_true', + help=u"show all changes but do nothing" ) write_cmd.parser.add_option( - '-f', '--force', action='store_true', - help="write tags even if the existing tags match the database" + u'-f', u'--force', action='store_true', + help=u"write tags even if the existing tags match the database" ) write_cmd.func = write_func default_commands.append(write_cmd) @@ -1486,7 +1594,7 @@ def config_func(lib, opts, args): filenames.insert(0, user_path) for filename in filenames: - print(filename) + print_(displayable_path(filename)) # Open in editor. elif opts.edit: @@ -1494,52 +1602,44 @@ def config_func(lib, opts, args): # Dump configuration. else: - print(config.dump(full=opts.defaults)) + config_out = config.dump(full=opts.defaults, redact=opts.redact) + print_(util.text_string(config_out)) def config_edit(): """Open a program to edit the user configuration. + An empty config file is created if no existing config file exists. """ path = config.user_config_path() - - if 'EDITOR' in os.environ: - editor = os.environ['EDITOR'] - try: - editor = shlex.split(editor) - except ValueError: # Malformed shell tokens. - editor = [editor] - args = editor + [path] - args.insert(1, args[0]) - elif platform.system() == 'Darwin': - args = ['open', 'open', '-n', path] - elif platform.system() == 'Windows': - # On windows we can execute arbitrary files. The os will - # take care of starting an appropriate application - args = [path, path] - else: - # Assume Unix - args = ['xdg-open', 'xdg-open', path] - + editor = util.editor_command() try: - os.execlp(*args) - except OSError: - raise ui.UserError("Could not edit configuration. Please " - "set the EDITOR environment variable.") + if not os.path.isfile(path): + open(path, 'w+').close() + util.interactive_open([path], editor) + except OSError as exc: + message = u"Could not edit configuration: {0}".format(exc) + if not editor: + message += u". Please set the EDITOR environment variable" + raise ui.UserError(message) - -config_cmd = ui.Subcommand('config', - help='show or edit the user configuration') +config_cmd = ui.Subcommand(u'config', + help=u'show or edit the user configuration') config_cmd.parser.add_option( - '-p', '--paths', action='store_true', - help='show files that configuration was loaded from' + u'-p', u'--paths', action='store_true', + help=u'show files that configuration was loaded from' ) config_cmd.parser.add_option( - '-e', '--edit', action='store_true', - help='edit user configuration with $EDITOR' + u'-e', u'--edit', action='store_true', + help=u'edit user configuration with $EDITOR' ) config_cmd.parser.add_option( - '-d', '--defaults', action='store_true', - help='include the default configuration' + u'-d', u'--defaults', action='store_true', + help=u'include the default configuration' +) +config_cmd.parser.add_option( + u'-c', u'--clear', action='store_false', + dest='redact', default=True, + help=u'do not redact sensitive fields' ) config_cmd.func = config_func default_commands.append(config_cmd) @@ -1549,17 +1649,19 @@ default_commands.append(config_cmd) def print_completion(*args): for line in completion_script(default_commands + plugins.commands()): - print(line, end='') + print_(line, end=u'') if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): - log.warn(u'Warning: Unable to find the bash-completion package. ' - u'Command line completion might not work.') + log.warning(u'Warning: Unable to find the bash-completion package. ' + u'Command line completion might not work.') BASH_COMPLETION_PATHS = map(syspath, [ u'/etc/bash_completion', u'/usr/share/bash-completion/bash_completion', - u'/usr/share/local/bash-completion/bash_completion', - u'/opt/local/share/bash-completion/bash_completion', # SmartOS - u'/usr/local/etc/bash_completion', # Homebrew + u'/usr/local/share/bash-completion/bash_completion', + # SmartOS + u'/opt/local/share/bash-completion/bash_completion', + # Homebrew (before bash-completion2) + u'/usr/local/etc/bash_completion', ]) @@ -1571,7 +1673,7 @@ def completion_script(commands): """ base_script = os.path.join(_package_path('beets.ui'), 'completion_base.sh') with open(base_script, 'r') as base_script: - yield base_script.read() + yield util.text_string(base_script.read()) options = {} aliases = {} @@ -1586,12 +1688,12 @@ def completion_script(commands): if re.match(r'^\w+$', alias): aliases[alias] = name - options[name] = {'flags': [], 'opts': []} + options[name] = {u'flags': [], u'opts': []} for opts in cmd.parser._get_all_options()[1:]: if opts.action in ('store_true', 'store_false'): - option_type = 'flags' + option_type = u'flags' else: - option_type = 'opts' + option_type = u'opts' options[name][option_type].extend( opts._short_opts + opts._long_opts @@ -1599,47 +1701,52 @@ def completion_script(commands): # Add global options options['_global'] = { - 'flags': ['-v', '--verbose'], - 'opts': '-l --library -c --config -d --directory -h --help'.split(' ') + u'flags': [u'-v', u'--verbose'], + u'opts': + u'-l --library -c --config -d --directory -h --help'.split(u' ') } # Add flags common to all commands options['_common'] = { - 'flags': ['-h', '--help'] + u'flags': [u'-h', u'--help'] } # Start generating the script - yield "_beet() {\n" + yield u"_beet() {\n" # Command names - yield " local commands='%s'\n" % ' '.join(command_names) - yield "\n" + yield u" local commands='%s'\n" % ' '.join(command_names) + yield u"\n" # Command aliases - yield " local aliases='%s'\n" % ' '.join(aliases.keys()) + yield u" local aliases='%s'\n" % ' '.join(aliases.keys()) for alias, cmd in aliases.items(): - yield " local alias__%s=%s\n" % (alias, cmd) - yield '\n' + yield u" local alias__%s=%s\n" % (alias, cmd) + yield u'\n' # Fields - yield " fields='%s'\n" % ' '.join( - set(library.Item._fields.keys() + library.Album._fields.keys()) + yield u" fields='%s'\n" % ' '.join( + set( + list(library.Item._fields.keys()) + + list(library.Album._fields.keys()) + ) ) # Command options for cmd, opts in options.items(): for option_type, option_list in opts.items(): if option_list: - option_list = ' '.join(option_list) - yield " local %s__%s='%s'\n" % (option_type, cmd, option_list) + option_list = u' '.join(option_list) + yield u" local %s__%s='%s'\n" % ( + option_type, cmd, option_list) - yield ' _beet_dispatch\n' - yield '}\n' + yield u' _beet_dispatch\n' + yield u'}\n' completion_cmd = ui.Subcommand( 'completion', - help='print shell script that provides command line completion' + help=u'print shell script that provides command line completion' ) completion_cmd.func = print_completion completion_cmd.hide = True diff --git a/lib/beets/ui/completion_base.sh b/lib/beets/ui/completion_base.sh old mode 100644 new mode 100755 diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py old mode 100644 new mode 100755 index 38cecd70..f6cd488d --- a/lib/beets/util/__init__.py +++ b/lib/beets/util/__init__.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,21 +14,28 @@ # included in all copies or substantial portions of the Software. """Miscellaneous utility functions.""" -from __future__ import division +from __future__ import division, absolute_import, print_function import os import sys +import errno +import locale import re import shutil import fnmatch -from collections import defaultdict +from collections import Counter import traceback import subprocess import platform +import shlex +from beets.util import hidden +import six +from unidecode import unidecode MAX_FILENAME_LENGTH = 200 WINDOWS_MAGIC_PREFIX = u'\\\\?\\' +SNI_SUPPORTED = sys.version_info >= (2, 7, 9) class HumanReadableException(Exception): @@ -54,22 +62,22 @@ class HumanReadableException(Exception): def _gerund(self): """Generate a (likely) gerund form of the English verb. """ - if ' ' in self.verb: + if u' ' in self.verb: return self.verb - gerund = self.verb[:-1] if self.verb.endswith('e') else self.verb - gerund += 'ing' + gerund = self.verb[:-1] if self.verb.endswith(u'e') else self.verb + gerund += u'ing' return gerund def _reasonstr(self): """Get the reason as a string.""" - if isinstance(self.reason, unicode): + if isinstance(self.reason, six.text_type): return self.reason - elif isinstance(self.reason, basestring): # Byte string. - return self.reason.decode('utf8', 'ignore') + elif isinstance(self.reason, bytes): + return self.reason.decode('utf-8', 'ignore') elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError return self.reason.strerror else: - return u'"{0}"'.format(unicode(self.reason)) + return u'"{0}"'.format(six.text_type(self.reason)) def get_message(self): """Create the human-readable description of the error, sans @@ -83,7 +91,7 @@ class HumanReadableException(Exception): """ if self.tb: logger.debug(self.tb) - logger.error(u'{0}: {1}'.format(self.error_kind, self.args[0])) + logger.error(u'{0}: {1}', self.error_kind, self.args[0]) class FilesystemError(HumanReadableException): @@ -149,21 +157,22 @@ def ancestry(path): return out -def sorted_walk(path, ignore=(), logger=None): +def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): """Like `os.walk`, but yields things in case-insensitive sorted, breadth-first order. Directory and file names matching any glob pattern in `ignore` are skipped. If `logger` is provided, then warning messages are logged there when a directory cannot be listed. """ - # Make sure the path isn't a Unicode string. + # Make sure the pathes aren't Unicode strings. path = bytestring_path(path) + ignore = [bytestring_path(i) for i in ignore] # Get all the directories and files at this level. try: contents = os.listdir(syspath(path)) except OSError as exc: if logger: - logger.warn(u'could not list directory {0}: {1}'.format( + logger.warning(u'could not list directory {0}: {1}'.format( displayable_path(path), exc.strerror )) return @@ -183,10 +192,11 @@ def sorted_walk(path, ignore=(), logger=None): # Add to output as either a file or a directory. cur = os.path.join(path, base) - if os.path.isdir(syspath(cur)): - dirs.append(base) - else: - files.append(base) + if (ignore_hidden and not hidden.is_hidden(cur)) or not ignore_hidden: + if os.path.isdir(syspath(cur)): + dirs.append(base) + else: + files.append(base) # Sort lists (case-insensitive) and yield the current level. dirs.sort(key=bytes.lower) @@ -197,7 +207,7 @@ def sorted_walk(path, ignore=(), logger=None): for base in dirs: cur = os.path.join(path, base) # yield from sorted_walk(...) - for res in sorted_walk(cur, ignore, logger): + for res in sorted_walk(cur, ignore, ignore_hidden, logger): yield res @@ -260,7 +270,9 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): if not os.path.exists(directory): # Directory gone already. continue - if fnmatch_all(os.listdir(directory), clutter): + clutter = [bytestring_path(c) for c in clutter] + match_paths = [bytestring_path(d) for d in os.listdir(directory)] + if fnmatch_all(match_paths, clutter): # Directory contains only clutter (or nothing). try: shutil.rmtree(directory) @@ -294,6 +306,18 @@ def components(path): return comps +def arg_encoding(): + """Get the encoding for command-line arguments (and other OS + locale-sensitive strings). + """ + try: + return locale.getdefaultlocale()[1] or 'utf-8' + except ValueError: + # Invalid locale environment variable setting. To avoid + # failing entirely for no good reason, assume UTF-8. + return 'utf-8' + + def _fsencoding(): """Get the system's filesystem encoding. On Windows, this is always UTF-8 (not MBCS). @@ -305,16 +329,16 @@ def _fsencoding(): # for Windows paths, so the encoding is actually immaterial so # we can avoid dealing with this nastiness. We arbitrarily # choose UTF-8. - encoding = 'utf8' + encoding = 'utf-8' return encoding def bytestring_path(path): - """Given a path, which is either a str or a unicode, returns a str + """Given a path, which is either a bytes or a unicode, returns a str path (ensuring that we never deal with Unicode pathnames). """ # Pass through bytestrings. - if isinstance(path, str): + if isinstance(path, bytes): return path # On Windows, remove the magic prefix added by `syspath`. This makes @@ -323,11 +347,14 @@ def bytestring_path(path): if os.path.__name__ == 'ntpath' and path.startswith(WINDOWS_MAGIC_PREFIX): path = path[len(WINDOWS_MAGIC_PREFIX):] - # Try to encode with default encodings, but fall back to UTF8. + # Try to encode with default encodings, but fall back to utf-8. try: return path.encode(_fsencoding()) except (UnicodeError, LookupError): - return path.encode('utf8') + return path.encode('utf-8') + + +PATH_SEP = bytestring_path(os.sep) def displayable_path(path, separator=u'; '): @@ -337,16 +364,16 @@ def displayable_path(path, separator=u'; '): """ if isinstance(path, (list, tuple)): return separator.join(displayable_path(p) for p in path) - elif isinstance(path, unicode): + elif isinstance(path, six.text_type): return path - elif not isinstance(path, str): + elif not isinstance(path, bytes): # A non-string object: just get its unicode representation. - return unicode(path) + return six.text_type(path) try: return path.decode(_fsencoding(), 'ignore') except (UnicodeError, LookupError): - return path.decode('utf8', 'ignore') + return path.decode('utf-8', 'ignore') def syspath(path, prefix=True): @@ -360,12 +387,12 @@ def syspath(path, prefix=True): if os.path.__name__ != 'ntpath': return path - if not isinstance(path, unicode): + if not isinstance(path, six.text_type): # Beets currently represents Windows paths internally with UTF-8 # arbitrarily. But earlier versions used MBCS because it is # reported as the FS encoding by Windows. Try both. try: - path = path.decode('utf8') + path = path.decode('utf-8') except UnicodeError: # The encoding should always be MBCS, Windows' broken # Unicode representation. @@ -412,7 +439,7 @@ def copy(path, dest, replace=False): path = syspath(path) dest = syspath(dest) if not replace and os.path.exists(dest): - raise FilesystemError('file exists', 'copy', (path, dest)) + raise FilesystemError(u'file exists', 'copy', (path, dest)) try: shutil.copyfile(path, dest) except (OSError, IOError) as exc: @@ -433,8 +460,7 @@ def move(path, dest, replace=False): path = syspath(path) dest = syspath(dest) if os.path.exists(dest) and not replace: - raise FilesystemError('file exists', 'rename', (path, dest), - traceback.format_exc()) + raise FilesystemError(u'file exists', 'rename', (path, dest)) # First, try renaming the file. try: @@ -452,23 +478,52 @@ def move(path, dest, replace=False): def link(path, dest, replace=False): """Create a symbolic link from path to `dest`. Raises an OSError if `dest` already exists, unless `replace` is True. Does nothing if - `path` == `dest`.""" - if (samefile(path, dest)): + `path` == `dest`. + """ + if samefile(path, dest): return - path = syspath(path) - dest = syspath(dest) - if os.path.exists(dest) and not replace: - raise FilesystemError('file exists', 'rename', (path, dest), - traceback.format_exc()) + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError(u'file exists', 'rename', (path, dest)) try: - os.symlink(path, dest) - except OSError: - raise FilesystemError('Operating system does not support symbolic ' - 'links.', 'link', (path, dest), + os.symlink(syspath(path), syspath(dest)) + except NotImplementedError: + # raised on python >= 3.2 and Windows versions before Vista + raise FilesystemError(u'OS does not support symbolic links.' + 'link', (path, dest), traceback.format_exc()) + except OSError as exc: + # TODO: Windows version checks can be removed for python 3 + if hasattr('sys', 'getwindowsversion'): + if sys.getwindowsversion()[0] < 6: # is before Vista + exc = u'OS does not support symbolic links.' + raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) +def hardlink(path, dest, replace=False): + """Create a hard link from path to `dest`. Raises an OSError if + `dest` already exists, unless `replace` is True. Does nothing if + `path` == `dest`. + """ + if samefile(path, dest): + return + + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError(u'file exists', 'rename', (path, dest)) + try: + os.link(syspath(path), syspath(dest)) + except NotImplementedError: + raise FilesystemError(u'OS does not support hard links.' + 'link', (path, dest), traceback.format_exc()) + except OSError as exc: + if exc.errno == errno.EXDEV: + raise FilesystemError(u'Cannot hard link across devices.' + 'link', (path, dest), traceback.format_exc()) + else: + raise FilesystemError(exc, 'link', (path, dest), + traceback.format_exc()) + + def unique_path(path): """Returns a version of ``path`` that does not exist on the filesystem. Specifically, if ``path` itself already exists, then @@ -478,7 +533,7 @@ def unique_path(path): return path base, ext = os.path.splitext(path) - match = re.search(r'\.(\d)+$', base) + match = re.search(br'\.(\d)+$', base) if match: num = int(match.group(1)) base = base[:match.start()] @@ -486,7 +541,8 @@ def unique_path(path): num = 0 while True: num += 1 - new_path = '%s.%i%s' % (base, num, ext) + suffix = u'.{}'.format(num).encode() + ext + new_path = base + suffix if not os.path.exists(new_path): return new_path @@ -495,12 +551,12 @@ def unique_path(path): # shares, which are sufficiently common as to cause frequent problems. # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx CHAR_REPLACE = [ - (re.compile(ur'[\\/]'), u'_'), # / and \ -- forbidden everywhere. - (re.compile(ur'^\.'), u'_'), # Leading dot (hidden files on Unix). - (re.compile(ur'[\x00-\x1f]'), u''), # Control characters. - (re.compile(ur'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". - (re.compile(ur'\.$'), u'_'), # Trailing dots. - (re.compile(ur'\s+$'), u''), # Trailing whitespace. + (re.compile(r'[\\/]'), u'_'), # / and \ -- forbidden everywhere. + (re.compile(r'^\.'), u'_'), # Leading dot (hidden files on Unix). + (re.compile(r'[\x00-\x1f]'), u''), # Control characters. + (re.compile(r'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". + (re.compile(r'\.$'), u'_'), # Trailing dots. + (re.compile(r'\s+$'), u''), # Trailing whitespace. ] @@ -542,73 +598,142 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH): return os.path.join(*out) +def _legalize_stage(path, replacements, length, extension, fragment): + """Perform a single round of path legalization steps + (sanitation/replacement, encoding from Unicode to bytes, + extension-appending, and truncation). Return the path (Unicode if + `fragment` is set, `bytes` otherwise) and whether truncation was + required. + """ + # Perform an initial sanitization including user replacements. + path = sanitize_path(path, replacements) + + # Encode for the filesystem. + if not fragment: + path = bytestring_path(path) + + # Preserve extension. + path += extension.lower() + + # Truncate too-long components. + pre_truncate_path = path + path = truncate_path(path, length) + + return path, path != pre_truncate_path + + +def legalize_path(path, replacements, length, extension, fragment): + """Given a path-like Unicode string, produce a legal path. Return + the path and a flag indicating whether some replacements had to be + ignored (see below). + + The legalization process (see `_legalize_stage`) consists of + applying the sanitation rules in `replacements`, encoding the string + to bytes (unless `fragment` is set), truncating components to + `length`, appending the `extension`. + + This function performs up to three calls to `_legalize_stage` in + case truncation conflicts with replacements (as can happen when + truncation creates whitespace at the end of the string, for + example). The limited number of iterations iterations avoids the + possibility of an infinite loop of sanitation and truncation + operations, which could be caused by replacement rules that make the + string longer. The flag returned from this function indicates that + the path has to be truncated twice (indicating that replacements + made the string longer again after it was truncated); the + application should probably log some sort of warning. + """ + + if fragment: + # Outputting Unicode. + extension = extension.decode('utf-8', 'ignore') + + first_stage_path, _ = _legalize_stage( + path, replacements, length, extension, fragment + ) + + # Convert back to Unicode with extension removed. + first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path)) + + # Re-sanitize following truncation (including user replacements). + second_stage_path, retruncated = _legalize_stage( + first_stage_path, replacements, length, extension, fragment + ) + + # If the path was once again truncated, discard user replacements + # and run through one last legalization stage. + if retruncated: + second_stage_path, _ = _legalize_stage( + first_stage_path, None, length, extension, fragment + ) + + return second_stage_path, retruncated + + +def py3_path(path): + """Convert a bytestring path to Unicode on Python 3 only. On Python + 2, return the bytestring path unchanged. + + This helps deal with APIs on Python 3 that *only* accept Unicode + (i.e., `str` objects). I philosophically disagree with this + decision, because paths are sadly bytes on Unix, but that's the way + it is. So this function helps us "smuggle" the true bytes data + through APIs that took Python 3's Unicode mandate too seriously. + """ + if isinstance(path, six.text_type): + return path + assert isinstance(path, bytes) + if six.PY2: + return path + return os.fsdecode(path) + + def str2bool(value): """Returns a boolean reflecting a human-entered string.""" - if value.lower() in ('yes', '1', 'true', 't', 'y'): - return True - else: - return False + return value.lower() in (u'yes', u'1', u'true', u't', u'y') def as_string(value): """Convert a value to a Unicode object for matching with a query. None becomes the empty string. Bytestrings are silently decoded. """ + if six.PY2: + buffer_types = buffer, memoryview # noqa: F821 + else: + buffer_types = memoryview + if value is None: return u'' - elif isinstance(value, buffer): - return str(value).decode('utf8', 'ignore') - elif isinstance(value, str): - return value.decode('utf8', 'ignore') + elif isinstance(value, buffer_types): + return bytes(value).decode('utf-8', 'ignore') + elif isinstance(value, bytes): + return value.decode('utf-8', 'ignore') else: - return unicode(value) + return six.text_type(value) -def levenshtein(s1, s2): - """A nice DP edit distance implementation from Wikibooks: - http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/ - Levenshtein_distance#Python +def text_string(value, encoding='utf-8'): + """Convert a string, which can either be bytes or unicode, to + unicode. + + Text (unicode) is left untouched; bytes are decoded. This is useful + to convert from a "native string" (bytes on Python 2, str on Python + 3) to a consistently unicode value. """ - if len(s1) < len(s2): - return levenshtein(s2, s1) - if not s1: - return len(s2) - - previous_row = xrange(len(s2) + 1) - for i, c1 in enumerate(s1): - current_row = [i + 1] - for j, c2 in enumerate(s2): - insertions = previous_row[j + 1] + 1 - deletions = current_row[j] + 1 - substitutions = previous_row[j] + (c1 != c2) - current_row.append(min(insertions, deletions, substitutions)) - previous_row = current_row - - return previous_row[-1] + if isinstance(value, bytes): + return value.decode(encoding) + return value def plurality(objs): - """Given a sequence of comparable objects, returns the object that - is most common in the set and the frequency of that object. The + """Given a sequence of hashble objects, returns the object that + is most common in the set and the its number of appearance. The sequence must contain at least one object. """ - # Calculate frequencies. - freqs = defaultdict(int) - for obj in objs: - freqs[obj] += 1 - - if not freqs: - raise ValueError('sequence must be non-empty') - - # Find object with maximum frequency. - max_freq = 0 - res = None - for obj, freq in freqs.items(): - if freq > max_freq: - max_freq = freq - res = obj - - return res, max_freq + c = Counter(objs) + if not c: + raise ValueError(u'sequence must be non-empty') + return c.most_common(1)[0] def cpu_count(): @@ -624,8 +749,8 @@ def cpu_count(): num = 0 elif sys.platform == 'darwin': try: - num = int(command_output(['sysctl', '-n', 'hw.ncpu'])) - except ValueError: + num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu'])) + except (ValueError, OSError, subprocess.CalledProcessError): num = 0 else: try: @@ -638,21 +763,40 @@ def cpu_count(): return 1 +def convert_command_args(args): + """Convert command arguments to bytestrings on Python 2 and + surrogate-escaped strings on Python 3.""" + assert isinstance(args, list) + + def convert(arg): + if six.PY2: + if isinstance(arg, six.text_type): + arg = arg.encode(arg_encoding()) + else: + if isinstance(arg, bytes): + arg = arg.decode(arg_encoding(), 'surrogateescape') + return arg + + return [convert(a) for a in args] + + def command_output(cmd, shell=False): """Runs the command and returns its output after it has exited. - ``cmd`` is a list of arguments starting with the command names. If - ``shell`` is true, ``cmd`` is assumed to be a string and passed to a + ``cmd`` is a list of arguments starting with the command names. The + arguments are bytes on Unix and strings on Windows. + If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a shell to execute. If the process exits with a non-zero return code ``subprocess.CalledProcessError`` is raised. May also raise ``OSError``. - This replaces `subprocess.check_output`, which isn't available in - Python 2.6 and which can have problems if lots of output is sent to - stderr. + This replaces `subprocess.check_output` which can have problems if lots of + output is sent to stderr. """ + cmd = convert_command_args(cmd) + proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, @@ -665,6 +809,7 @@ def command_output(cmd, shell=False): raise subprocess.CalledProcessError( returncode=proc.returncode, cmd=' '.join(cmd), + output=stdout + stderr, ) return stdout @@ -684,3 +829,165 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): return min(res[9], limit) else: return limit + + +def open_anything(): + """Return the system command that dispatches execution to the correct + program. + """ + sys_name = platform.system() + if sys_name == 'Darwin': + base_cmd = 'open' + elif sys_name == 'Windows': + base_cmd = 'start' + else: # Assume Unix + base_cmd = 'xdg-open' + return base_cmd + + +def editor_command(): + """Get a command for opening a text file. + + Use the `EDITOR` environment variable by default. If it is not + present, fall back to `open_anything()`, the platform-specific tool + for opening files in general. + """ + editor = os.environ.get('EDITOR') + if editor: + return editor + return open_anything() + + +def shlex_split(s): + """Split a Unicode or bytes string according to shell lexing rules. + + Raise `ValueError` if the string is not a well-formed shell string. + This is a workaround for a bug in some versions of Python. + """ + if not six.PY2 or isinstance(s, bytes): # Shlex works fine. + return shlex.split(s) + + elif isinstance(s, six.text_type): + # Work around a Python bug. + # http://bugs.python.org/issue6988 + bs = s.encode('utf-8') + return [c.decode('utf-8') for c in shlex.split(bs)] + + else: + raise TypeError(u'shlex_split called with non-string') + + +def interactive_open(targets, command): + """Open the files in `targets` by `exec`ing a new `command`, given + as a Unicode string. (The new program takes over, and Python + execution ends: this does not fork a subprocess.) + + Can raise `OSError`. + """ + assert command + + # Split the command string into its arguments. + try: + args = shlex_split(command) + except ValueError: # Malformed shell tokens. + args = [command] + + args.insert(0, args[0]) # for argv[0] + + args += targets + + return os.execlp(*args) + + +def _windows_long_path_name(short_path): + """Use Windows' `GetLongPathNameW` via ctypes to get the canonical, + long path given a short filename. + """ + if not isinstance(short_path, six.text_type): + short_path = short_path.decode(_fsencoding()) + + import ctypes + buf = ctypes.create_unicode_buffer(260) + get_long_path_name_w = ctypes.windll.kernel32.GetLongPathNameW + return_value = get_long_path_name_w(short_path, buf, 260) + + if return_value == 0 or return_value > 260: + # An error occurred + return short_path + else: + long_path = buf.value + # GetLongPathNameW does not change the case of the drive + # letter. + if len(long_path) > 1 and long_path[1] == ':': + long_path = long_path[0].upper() + long_path[1:] + return long_path + + +def case_sensitive(path): + """Check whether the filesystem at the given path is case sensitive. + + To work best, the path should point to a file or a directory. If the path + does not exist, assume a case sensitive file system on every platform + except Windows. + """ + # A fallback in case the path does not exist. + if not os.path.exists(syspath(path)): + # By default, the case sensitivity depends on the platform. + return platform.system() != 'Windows' + + # If an upper-case version of the path exists but a lower-case + # version does not, then the filesystem must be case-sensitive. + # (Otherwise, we have more work to do.) + if not (os.path.exists(syspath(path.lower())) and + os.path.exists(syspath(path.upper()))): + return True + + # Both versions of the path exist on the file system. Check whether + # they refer to different files by their inodes. Alas, + # `os.path.samefile` is only available on Unix systems on Python 2. + if platform.system() != 'Windows': + return not os.path.samefile(syspath(path.lower()), + syspath(path.upper())) + + # On Windows, we check whether the canonical, long filenames for the + # files are the same. + lower = _windows_long_path_name(path.lower()) + upper = _windows_long_path_name(path.upper()) + return lower != upper + + +def raw_seconds_short(string): + """Formats a human-readable M:SS string as a float (number of seconds). + + Raises ValueError if the conversion cannot take place due to `string` not + being in the right format. + """ + match = re.match(r'^(\d+):([0-5]\d)$', string) + if not match: + raise ValueError(u'String not in M:SS format') + minutes, seconds = map(int, match.groups()) + return float(minutes * 60 + seconds) + + +def asciify_path(path, sep_replace): + """Decodes all unicode characters in a path into ASCII equivalents. + + Substitutions are provided by the unidecode module. Path separators in the + input are preserved. + + Keyword arguments: + path -- The path to be asciified. + sep_replace -- the string to be used to replace extraneous path separators. + """ + # if this platform has an os.altsep, change it to os.sep. + if os.altsep: + path = path.replace(os.altsep, os.sep) + path_components = path.split(os.sep) + for index, item in enumerate(path_components): + path_components[index] = unidecode(item).replace(os.sep, sep_replace) + if os.altsep: + path_components[index] = unidecode(item).replace( + os.altsep, + sep_replace + ) + return os.sep.join(path_components) diff --git a/lib/beets/util/artresizer.py b/lib/beets/util/artresizer.py old mode 100644 new mode 100755 index f17fdc5b..e84b775d --- a/lib/beets/util/artresizer.py +++ b/lib/beets/util/artresizer.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2014, Fabrice Laporte +# Copyright 2016, Fabrice Laporte # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -15,20 +16,26 @@ """Abstraction layer to resize images using PIL, ImageMagick, or a public resizing proxy if neither is available. """ -import urllib +from __future__ import division, absolute_import, print_function + import subprocess import os import re from tempfile import NamedTemporaryFile -import logging +from six.moves.urllib.parse import urlencode +from beets import logging from beets import util +import six # Resizing methods PIL = 1 IMAGEMAGICK = 2 WEBPROXY = 3 -PROXY_URL = 'http://images.weserv.nl/' +if util.SNI_SUPPORTED: + PROXY_URL = 'https://images.weserv.nl/' +else: + PROXY_URL = 'http://images.weserv.nl/' log = logging.getLogger('beets') @@ -37,9 +44,9 @@ def resize_url(url, maxwidth): """Return a proxied image URL that resizes the original image to maxwidth (preserving aspect ratio). """ - return '{0}?{1}'.format(PROXY_URL, urllib.urlencode({ + return '{0}?{1}'.format(PROXY_URL, urlencode({ 'url': url.replace('http://', ''), - 'w': str(maxwidth), + 'w': maxwidth, })) @@ -48,8 +55,8 @@ def temp_file_for(path): specified path. """ ext = os.path.splitext(path)[1] - with NamedTemporaryFile(suffix=ext, delete=False) as f: - return f.name + with NamedTemporaryFile(suffix=util.py3_path(ext), delete=False) as f: + return util.bytestring_path(f.name) def pil_resize(maxwidth, path_in, path_out=None): @@ -58,9 +65,8 @@ def pil_resize(maxwidth, path_in, path_out=None): """ path_out = path_out or temp_file_for(path_in) from PIL import Image - log.debug(u'artresizer: PIL resizing {0} to {1}'.format( - util.displayable_path(path_in), util.displayable_path(path_out) - )) + log.debug(u'artresizer: PIL resizing {0} to {1}', + util.displayable_path(path_in), util.displayable_path(path_out)) try: im = Image.open(util.syspath(path_in)) @@ -69,9 +75,8 @@ def pil_resize(maxwidth, path_in, path_out=None): im.save(path_out) return path_out except IOError: - log.error(u"PIL cannot create thumbnail for '{0}'".format( - util.displayable_path(path_in) - )) + log.error(u"PIL cannot create thumbnail for '{0}'", + util.displayable_path(path_in)) return path_in @@ -80,9 +85,8 @@ def im_resize(maxwidth, path_in, path_out=None): Return the output path of resized image. """ path_out = path_out or temp_file_for(path_in) - log.debug(u'artresizer: ImageMagick resizing {0} to {1}'.format( - util.displayable_path(path_in), util.displayable_path(path_out) - )) + log.debug(u'artresizer: ImageMagick resizing {0} to {1}', + util.displayable_path(path_in), util.displayable_path(path_out)) # "-resize widthxheight>" shrinks images with dimension(s) larger # than the corresponding width and/or height dimension(s). The > @@ -90,13 +94,13 @@ def im_resize(maxwidth, path_in, path_out=None): # compatibility. try: util.command_output([ - 'convert', util.syspath(path_in), - '-resize', '{0}x^>'.format(maxwidth), path_out + 'convert', util.syspath(path_in, prefix=False), + '-resize', '{0}x^>'.format(maxwidth), + util.syspath(path_out, prefix=False), ]) except subprocess.CalledProcessError: - log.warn(u'artresizer: IM convert failed for {0}'.format( - util.displayable_path(path_in) - )) + log.warning(u'artresizer: IM convert failed for {0}', + util.displayable_path(path_in)) return path_in return path_out @@ -107,34 +111,67 @@ BACKEND_FUNCS = { } +def pil_getsize(path_in): + from PIL import Image + try: + im = Image.open(util.syspath(path_in)) + return im.size + except IOError as exc: + log.error(u"PIL could not read file {}: {}", + util.displayable_path(path_in), exc) + + +def im_getsize(path_in): + cmd = ['identify', '-format', '%w %h', + util.syspath(path_in, prefix=False)] + try: + out = util.command_output(cmd) + except subprocess.CalledProcessError as exc: + log.warning(u'ImageMagick size query failed') + log.debug( + u'`convert` exited with (status {}) when ' + u'getting size with command {}:\n{}', + exc.returncode, cmd, exc.output.strip() + ) + return + try: + return tuple(map(int, out.split(b' '))) + except IndexError: + log.warning(u'Could not understand IM output: {0!r}', out) + + +BACKEND_GET_SIZE = { + PIL: pil_getsize, + IMAGEMAGICK: im_getsize, +} + + class Shareable(type): """A pseudo-singleton metaclass that allows both shared and non-shared instances. The ``MyClass.shared`` property holds a lazily-created shared instance of ``MyClass`` while calling ``MyClass()`` to construct a new object works as usual. """ - def __init__(cls, name, bases, dict): - super(Shareable, cls).__init__(name, bases, dict) - cls._instance = None + def __init__(self, name, bases, dict): + super(Shareable, self).__init__(name, bases, dict) + self._instance = None @property - def shared(cls): - if cls._instance is None: - cls._instance = cls() - return cls._instance + def shared(self): + if self._instance is None: + self._instance = self() + return self._instance -class ArtResizer(object): +class ArtResizer(six.with_metaclass(Shareable, object)): """A singleton class that performs image resizes. """ - __metaclass__ = Shareable - def __init__(self, method=None): - """Create a resizer object for the given method or, if none is - specified, with an inferred method. + def __init__(self): + """Create a resizer object with an inferred method. """ - self.method = self._check_method(method) - log.debug(u"artresizer: method is {0}".format(self.method)) + self.method = self._check_method() + log.debug(u"artresizer: method is {0}", self.method) self.can_compare = self._can_compare() def resize(self, maxwidth, path_in, path_out=None): @@ -165,47 +202,61 @@ class ArtResizer(object): """ return self.method[0] in BACKEND_FUNCS + def get_size(self, path_in): + """Return the size of an image file as an int couple (width, height) + in pixels. + + Only available locally + """ + if self.local: + func = BACKEND_GET_SIZE[self.method[0]] + return func(path_in) + def _can_compare(self): """A boolean indicating whether image comparison is available""" return self.method[0] == IMAGEMAGICK and self.method[1] > (6, 8, 7) @staticmethod - def _check_method(method=None): - """A tuple indicating whether current method is available and its - version. If no method is given, it returns a supported one. - """ - # Guess available method - if not method: - for m in [IMAGEMAGICK, PIL]: - _, version = ArtResizer._check_method(m) - if version: - return (m, version) - return (WEBPROXY, (0)) + def _check_method(): + """Return a tuple indicating an available method and its version.""" + version = get_im_version() + if version: + return IMAGEMAGICK, version - if method == IMAGEMAGICK: + version = get_pil_version() + if version: + return PIL, version - # Try invoking ImageMagick's "convert". - try: - out = util.command_output(['identify', '--version']) + return WEBPROXY, (0) - if 'imagemagick' in out.lower(): - pattern = r".+ (\d+)\.(\d+)\.(\d+).*" - match = re.search(pattern, out) - if match: - return (IMAGEMAGICK, - (int(match.group(1)), - int(match.group(2)), - int(match.group(3)))) - return (IMAGEMAGICK, (0)) - except (subprocess.CalledProcessError, OSError): - return (IMAGEMAGICK, None) +def get_im_version(): + """Return Image Magick version or None if it is unavailable + Try invoking ImageMagick's "convert". + """ + try: + out = util.command_output(['convert', '--version']) - if method == PIL: - # Try importing PIL. - try: - __import__('PIL', fromlist=['Image']) - return (PIL, (0)) - except ImportError: - return (PIL, None) + if b'imagemagick' in out.lower(): + pattern = br".+ (\d+)\.(\d+)\.(\d+).*" + match = re.search(pattern, out) + if match: + return (int(match.group(1)), + int(match.group(2)), + int(match.group(3))) + return (0,) + + except (subprocess.CalledProcessError, OSError) as exc: + log.debug(u'ImageMagick check `convert --version` failed: {}', exc) + return None + + +def get_pil_version(): + """Return Image Magick version or None if it is unavailable + Try importing PIL.""" + try: + __import__('PIL', fromlist=[str('Image')]) + return (0,) + except ImportError: + return None diff --git a/lib/beets/util/bluelet.py b/lib/beets/util/bluelet.py old mode 100644 new mode 100755 index a12ec945..48dd7bd9 --- a/lib/beets/util/bluelet.py +++ b/lib/beets/util/bluelet.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + """Extremely simple pure-Python implementation of coroutine-style asynchronous socket I/O. Inspired by, but inferior to, Eventlet. Bluelet can also be thought of as a less-terrible replacement for @@ -5,6 +7,9 @@ asyncore. Bluelet: easy concurrency without all the messy parallelism. """ +from __future__ import division, absolute_import, print_function + +import six import socket import select import sys @@ -15,20 +20,6 @@ import time import collections -# A little bit of "six" (Python 2/3 compatibility): cope with PEP 3109 syntax -# changes. - -PY3 = sys.version_info[0] == 3 -if PY3: - def _reraise(typ, exc, tb): - raise exc.with_traceback(tb) -else: - exec(""" -def _reraise(typ, exc, tb): - raise typ, exc, tb -""") - - # Basic events used for thread scheduling. class Event(object): @@ -210,7 +201,7 @@ class ThreadException(Exception): self.exc_info = exc_info def reraise(self): - _reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + six.reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) SUSPENDED = Event() # Special sentinel placeholder for suspended threads. @@ -550,7 +541,7 @@ def spawn(coro): and child coroutines run concurrently. """ if not isinstance(coro, types.GeneratorType): - raise ValueError('%s is not a coroutine' % str(coro)) + raise ValueError(u'%s is not a coroutine' % coro) return SpawnEvent(coro) @@ -560,7 +551,7 @@ def call(coro): returns a value using end(), then this event returns that value. """ if not isinstance(coro, types.GeneratorType): - raise ValueError('%s is not a coroutine' % str(coro)) + raise ValueError(u'%s is not a coroutine' % coro) return DelegationEvent(coro) diff --git a/lib/beets/util/confit.py b/lib/beets/util/confit.py old mode 100644 new mode 100755 index de22e0ad..373e05ff --- a/lib/beets/util/confit.py +++ b/lib/beets/util/confit.py @@ -1,5 +1,6 @@ -# This file is part of Confit. -# Copyright 2014, Adrian Sampson. +# -*- coding: utf-8 -*- +# This file is part of Confuse. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,19 +15,16 @@ """Worry-free YAML configuration files. """ -from __future__ import unicode_literals +from __future__ import division, absolute_import, print_function + import platform import os import pkgutil import sys import yaml -import types import collections import re -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict UNIX_DIR_VAR = 'XDG_CONFIG_HOME' UNIX_DIR_FALLBACK = '~/.config' @@ -40,14 +38,15 @@ ROOT_NAME = 'root' YAML_TAB_PROBLEM = "found character '\\t' that cannot start any token" +REDACTED_TOMBSTONE = 'REDACTED' + # Utilities. PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode -BASESTRING = str if PY3 else basestring -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) -TYPE_TYPES = (type,) if PY3 else (type, types.ClassType) +STRING = str if PY3 else unicode # noqa: F821 +BASESTRING = str if PY3 else basestring # noqa: F821 +NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) # noqa: F821 def iter_first(sequence): @@ -56,10 +55,7 @@ def iter_first(sequence): """ it = iter(sequence) try: - if PY3: - return next(it) - else: - return it.next() + return next(it) except StopIteration: raise ValueError() @@ -96,17 +92,17 @@ class ConfigReadError(ConfigError): self.filename = filename self.reason = reason - message = 'file {0} could not be read'.format(filename) + message = u'file {0} could not be read'.format(filename) if isinstance(reason, yaml.scanner.ScannerError) and \ reason.problem == YAML_TAB_PROBLEM: # Special-case error message for tab indentation in YAML markup. - message += ': found tab character at line {0}, column {1}'.format( + message += u': found tab character at line {0}, column {1}'.format( reason.problem_mark.line + 1, reason.problem_mark.column + 1, ) elif reason: # Generic error message uses exception's message. - message += ': {0}'.format(reason) + message += u': {0}'.format(reason) super(ConfigReadError, self).__init__(message) @@ -120,19 +116,19 @@ class ConfigSource(dict): def __init__(self, value, filename=None, default=False): super(ConfigSource, self).__init__(value) if filename is not None and not isinstance(filename, BASESTRING): - raise TypeError('filename must be a string or None') + raise TypeError(u'filename must be a string or None') self.filename = filename self.default = default def __repr__(self): - return 'ConfigSource({0}, {1}, {2})'.format( - super(ConfigSource, self).__repr__(), - repr(self.filename), - repr(self.default) + return 'ConfigSource({0!r}, {1!r}, {2!r})'.format( + super(ConfigSource, self), + self.filename, + self.default, ) @classmethod - def of(self, value): + def of(cls, value): """Given either a dictionary or a `ConfigSource` object, return a `ConfigSource` object. This lets a function accept either type of object as an argument. @@ -142,7 +138,7 @@ class ConfigSource(dict): elif isinstance(value, dict): return ConfigSource(value) else: - raise TypeError('source value must be a dict') + raise TypeError(u'source value must be a dict') class ConfigView(object): @@ -177,7 +173,7 @@ class ConfigView(object): try: return iter_first(pairs) except ValueError: - raise NotFoundError("{0} not found".format(self.name)) + raise NotFoundError(u"{0} not found".format(self.name)) def exists(self): """Determine whether the view has a setting in any source. @@ -208,7 +204,31 @@ class ConfigView(object): raise NotImplementedError def __repr__(self): - return '' % self.name + return '<{}: {}>'.format(self.__class__.__name__, self.name) + + def __iter__(self): + """Iterate over the keys of a dictionary view or the *subviews* + of a list view. + """ + # Try getting the keys, if this is a dictionary view. + try: + keys = self.keys() + for key in keys: + yield key + + except ConfigTypeError: + # Otherwise, try iterating over a list. + collection = self.get() + if not isinstance(collection, (list, tuple)): + raise ConfigTypeError( + u'{0} must be a dictionary or a list, not {1}'.format( + self.name, type(collection).__name__ + ) + ) + + # Yield all the indices in the list. + for index in range(len(collection)): + yield self[index] def __getitem__(self, key): """Get a subview of this view.""" @@ -225,10 +245,15 @@ class ConfigView(object): def set_args(self, namespace): """Overlay parsed command-line arguments, generated by a library - like argparse or optparse, onto this view's value. + like argparse or optparse, onto this view's value. ``namespace`` + can be a ``dict`` or namespace object. """ args = {} - for key, value in namespace.__dict__.items(): + if isinstance(namespace, dict): + items = namespace.items() + else: + items = namespace.__dict__.items() + for key, value in items: if value is not None: # Avoid unset options. args[key] = value self.set(args) @@ -239,14 +264,17 @@ class ConfigView(object): # just say ``bool(view)`` or use ``view`` in a conditional. def __str__(self): - """Gets the value for this view as a byte string.""" - return str(self.get()) + """Get the value for this view as a bytestring. + """ + if PY3: + return self.__unicode__() + else: + return bytes(self.get()) def __unicode__(self): - """Gets the value for this view as a unicode string. (Python 2 - only.) + """Get the value for this view as a Unicode string. """ - return unicode(self.get()) + return STRING(self.get()) def __nonzero__(self): """Gets the value for this view as a boolean. (Python 2 only.) @@ -276,7 +304,7 @@ class ConfigView(object): cur_keys = dic.keys() except AttributeError: raise ConfigTypeError( - '{0} must be a dict, not {1}'.format( + u'{0} must be a dict, not {1}'.format( self.name, type(dic).__name__ ) ) @@ -317,7 +345,7 @@ class ConfigView(object): it = iter(collection) except TypeError: raise ConfigTypeError( - '{0} must be an iterable, not {1}'.format( + u'{0} must be an iterable, not {1}'.format( self.name, type(collection).__name__ ) ) @@ -326,17 +354,23 @@ class ConfigView(object): # Validation and conversion. - def flatten(self): + def flatten(self, redact=False): """Create a hierarchy of OrderedDicts containing the data from this view, recursively reifying all views to get their represented values. + + If `redact` is set, then sensitive values are replaced with + the string "REDACTED". """ od = OrderedDict() for key, view in self.items(): - try: - od[key] = view.flatten() - except ConfigTypeError: - od[key] = view.get() + if redact and view.redact: + od[key] = REDACTED_TOMBSTONE + else: + try: + od[key] = view.flatten(redact=redact) + except ConfigTypeError: + od[key] = view.get() return od def get(self, template=None): @@ -354,19 +388,60 @@ class ConfigView(object): """ return as_template(template).value(self, template) - # Old validation methods (deprecated). + # Shortcuts for common templates. def as_filename(self): + """Get the value as a path. Equivalent to `get(Filename())`. + """ return self.get(Filename()) def as_choice(self, choices): + """Get the value from a list of choices. Equivalent to + `get(Choice(choices))`. + """ return self.get(Choice(choices)) def as_number(self): + """Get the value as any number type: int or float. Equivalent to + `get(Number())`. + """ return self.get(Number()) - def as_str_seq(self): - return self.get(StrSeq()) + def as_str_seq(self, split=True): + """Get the value as a sequence of strings. Equivalent to + `get(StrSeq())`. + """ + return self.get(StrSeq(split=split)) + + def as_str(self): + """Get the value as a (Unicode) string. Equivalent to + `get(unicode)` on Python 2 and `get(str)` on Python 3. + """ + return self.get(String()) + + # Redaction. + + @property + def redact(self): + """Whether the view contains sensitive information and should be + redacted from output. + """ + return () in self.get_redactions() + + @redact.setter + def redact(self, flag): + self.set_redaction((), flag) + + def set_redaction(self, path, flag): + """Add or remove a redaction for a key path, which should be an + iterable of keys. + """ + raise NotImplementedError() + + def get_redactions(self): + """Get the set of currently-redacted sub-key-paths at this view. + """ + raise NotImplementedError() class RootView(ConfigView): @@ -380,6 +455,7 @@ class RootView(ConfigView): """ self.sources = list(sources) self.name = ROOT_NAME + self.redactions = set() def add(self, obj): self.sources.append(ConfigSource.of(obj)) @@ -391,12 +467,24 @@ class RootView(ConfigView): return ((dict(s), s) for s in self.sources) def clear(self): - """Remove all sources from this configuration.""" + """Remove all sources (and redactions) from this + configuration. + """ del self.sources[:] + self.redactions.clear() def root(self): return self + def set_redaction(self, path, flag): + if flag: + self.redactions.add(path) + elif path in self.redactions: + self.redactions.remove(path) + + def get_redactions(self): + return self.redactions + class Subview(ConfigView): """A subview accessed via a subscript of a parent view.""" @@ -414,11 +502,13 @@ class Subview(ConfigView): if not isinstance(self.key, int): self.name += '.' if isinstance(self.key, int): - self.name += '#{0}'.format(self.key) - elif isinstance(self.key, BASESTRING): - self.name += '{0}'.format(self.key) + self.name += u'#{0}'.format(self.key) + elif isinstance(self.key, bytes): + self.name += self.key.decode('utf-8') + elif isinstance(self.key, STRING): + self.name += self.key else: - self.name += '{0}'.format(repr(self.key)) + self.name += repr(self.key) def resolve(self): for collection, source in self.parent.resolve(): @@ -433,7 +523,7 @@ class Subview(ConfigView): except TypeError: # Not subscriptable. raise ConfigTypeError( - "{0} must be a collection, not {1}".format( + u"{0} must be a collection, not {1}".format( self.parent.name, type(collection).__name__ ) ) @@ -448,6 +538,13 @@ class Subview(ConfigView): def root(self): return self.parent.root() + def set_redaction(self, path, flag): + self.parent.set_redaction((self.key,) + path, flag) + + def get_redactions(self): + return (kp[1:] for kp in self.parent.get_redactions() + if kp and kp[0] == self.key) + # Config file paths, including platform-specific paths and in-package # defaults. @@ -536,7 +633,7 @@ class Loader(yaml.SafeLoader): else: raise yaml.constructor.ConstructorError( None, None, - 'expected a mapping node, but found %s' % node.id, + u'expected a mapping node, but found %s' % node.id, node.start_mark ) @@ -547,7 +644,7 @@ class Loader(yaml.SafeLoader): hash(key) except TypeError as exc: raise yaml.constructor.ConstructorError( - 'while constructing a mapping', + u'while constructing a mapping', node.start_mark, 'found unacceptable key (%s)' % exc, key_node.start_mark ) @@ -595,11 +692,11 @@ class Dumper(yaml.SafeDumper): for item_key, item_value in mapping: node_key = self.represent_data(item_key) node_value = self.represent_data(item_value) - if not (isinstance(node_key, yaml.ScalarNode) - and not node_key.style): + if not (isinstance(node_key, yaml.ScalarNode) and + not node_key.style): best_style = False - if not (isinstance(node_value, yaml.ScalarNode) - and not node_value.style): + if not (isinstance(node_value, yaml.ScalarNode) and + not node_value.style): best_style = False value.append((node_key, node_value)) if flow_style is None: @@ -625,9 +722,9 @@ class Dumper(yaml.SafeDumper): """Represent bool as 'yes' or 'no' instead of 'true' or 'false'. """ if data: - value = 'yes' + value = u'yes' else: - value = 'no' + value = u'no' return self.represent_scalar('tag:yaml.org,2002:bool', value) def represent_none(self, data): @@ -752,7 +849,7 @@ class Configuration(RootView): appdir = os.environ[self._env_var] appdir = os.path.abspath(os.path.expanduser(appdir)) if os.path.isfile(appdir): - raise ConfigError('{0} must be a directory'.format( + raise ConfigError(u'{0} must be a directory'.format( self._env_var )) @@ -776,7 +873,7 @@ class Configuration(RootView): filename = os.path.abspath(filename) self.set(ConfigSource(load_yaml(filename), filename)) - def dump(self, full=True): + def dump(self, full=True, redact=False): """Dump the Configuration object to a YAML file. The order of the keys is determined from the default @@ -788,13 +885,17 @@ class Configuration(RootView): :type filename: unicode :param full: Dump settings that don't differ from the defaults as well + :param redact: Remove sensitive information (views with the `redact` + flag set) from the output """ if full: - out_dict = self.flatten() + out_dict = self.flatten(redact=redact) else: # Exclude defaults when flattening. sources = [s for s in self.sources if not s.default] - out_dict = RootView(sources).flatten() + temp_root = RootView(sources) + temp_root.redactions = self.redactions + out_dict = temp_root.flatten(redact=redact) yaml_out = yaml.dump(out_dict, Dumper=Dumper, default_flow_style=None, indent=4, @@ -806,7 +907,7 @@ class Configuration(RootView): if source.default: default_source = source break - if default_source: + if default_source and default_source.filename: with open(default_source.filename, 'r') as fp: default_data = fp.read() yaml_out = restore_yaml_comments(yaml_out, default_data) @@ -853,7 +954,7 @@ class LazyConfig(Configuration): def clear(self): """Remove all sources from this configuration.""" - del self.sources[:] + super(LazyConfig, self).clear() self._lazy_suffix = [] self._lazy_prefix = [] @@ -870,7 +971,7 @@ should be raised when the value is missing. class Template(object): """A value template for configuration fields. - The template works like a type and instructs Confit about how to + The template works like a type and instructs Confuse about how to interpret a deserialized YAML value. This includes type conversions, providing a default value, and validating for errors. For example, a filepath type might expand tildes and check that the file exists. @@ -901,7 +1002,7 @@ class Template(object): return self.convert(value, view) elif self.default is REQUIRED: # Missing required value. This is an error. - raise NotFoundError("{0} not found".format(view.name)) + raise NotFoundError(u"{0} not found".format(view.name)) else: # Missing value, but not required. return self.default @@ -926,7 +1027,7 @@ class Template(object): """ exc_class = ConfigTypeError if type_error else ConfigValueError raise exc_class( - '{0}: {1}'.format(view.name, message) + u'{0}: {1}'.format(view.name, message) ) def __repr__(self): @@ -947,7 +1048,7 @@ class Integer(Template): elif isinstance(value, float): return int(value) else: - self.fail('must be a number', view, True) + self.fail(u'must be a number', view, True) class Number(Template): @@ -960,7 +1061,7 @@ class Number(Template): return value else: self.fail( - 'must be numeric, not {0}'.format(type(value).__name__), + u'must be numeric, not {0}'.format(type(value).__name__), view, True ) @@ -1005,18 +1106,29 @@ class String(Template): if pattern: self.regex = re.compile(pattern) + def __repr__(self): + args = [] + + if self.default is not REQUIRED: + args.append(repr(self.default)) + + if self.pattern is not None: + args.append('pattern=' + repr(self.pattern)) + + return 'String({0})'.format(', '.join(args)) + def convert(self, value, view): """Check that the value is a string and matches the pattern. """ if isinstance(value, BASESTRING): if self.pattern and not self.regex.match(value): self.fail( - "must match the pattern {0}".format(self.pattern), + u"must match the pattern {0}".format(self.pattern), view ) return value else: - self.fail('must be a string', view, True) + self.fail(u'must be a string', view, True) class Choice(Template): @@ -1037,7 +1149,7 @@ class Choice(Template): """ if value not in self.choices: self.fail( - 'must be one of {0}, not {1}'.format( + u'must be one of {0}, not {1}'.format( repr(list(self.choices)), repr(value) ), view @@ -1052,6 +1164,67 @@ class Choice(Template): return 'Choice({0!r})'.format(self.choices) +class OneOf(Template): + """A template that permits values complying to one of the given templates. + """ + def __init__(self, allowed, default=REQUIRED): + super(OneOf, self).__init__(default) + self.allowed = list(allowed) + + def __repr__(self): + args = [] + + if self.allowed is not None: + args.append('allowed=' + repr(self.allowed)) + + if self.default is not REQUIRED: + args.append(repr(self.default)) + + return 'OneOf({0})'.format(', '.join(args)) + + def value(self, view, template): + self.template = template + return super(OneOf, self).value(view, template) + + def convert(self, value, view): + """Ensure that the value follows at least one template. + """ + is_mapping = isinstance(self.template, MappingTemplate) + + for candidate in self.allowed: + try: + if is_mapping: + if isinstance(candidate, Filename) and \ + candidate.relative_to: + next_template = candidate.template_with_relatives( + view, + self.template + ) + + next_template.subtemplates[view.key] = as_template( + candidate + ) + else: + next_template = MappingTemplate({view.key: candidate}) + + return view.parent.get(next_template)[view.key] + else: + return view.get(candidate) + except ConfigTemplateError: + raise + except ConfigError: + pass + except ValueError as exc: + raise ConfigTemplateError(exc) + + self.fail( + u'must be one of {0}, not {1}'.format( + repr(self.allowed), repr(value) + ), + view + ) + + class StrSeq(Template): """A template for values that are lists of strings. @@ -1070,7 +1243,7 @@ class StrSeq(Template): def convert(self, value, view): if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') + value = value.decode('utf-8', 'ignore') if isinstance(value, STRING): if self.split: @@ -1081,17 +1254,17 @@ class StrSeq(Template): try: value = list(value) except TypeError: - self.fail('must be a whitespace-separated string or a list', + self.fail(u'must be a whitespace-separated string or a list', view, True) def convert(x): - if isinstance(x, unicode): + if isinstance(x, STRING): return x - elif isinstance(x, BASESTRING): - return x.decode('utf8', 'ignore') + elif isinstance(x, bytes): + return x.decode('utf-8', 'ignore') else: - self.fail('must be a list of strings', view, True) - return map(convert, value) + self.fail(u'must be a list of strings', view, True) + return list(map(convert, value)) class Filename(Template): @@ -1107,7 +1280,7 @@ class Filename(Template): """ def __init__(self, default=REQUIRED, cwd=None, relative_to=None, in_app_dir=False): - """ `relative_to` is the name of a sibling value that is + """`relative_to` is the name of a sibling value that is being validated at the same time. `in_app_dir` indicates whether the path should be resolved @@ -1140,19 +1313,19 @@ class Filename(Template): if not isinstance(template, (collections.Mapping, MappingTemplate)): # disallow config.get(Filename(relative_to='foo')) raise ConfigTemplateError( - 'relative_to may only be used when getting multiple values.' + u'relative_to may only be used when getting multiple values.' ) elif self.relative_to == view.key: raise ConfigTemplateError( - '{0} is relative to itself'.format(view.name) + u'{0} is relative to itself'.format(view.name) ) elif self.relative_to not in view.parent.keys(): # self.relative_to is not in the config self.fail( ( - 'needs sibling value "{0}" to expand relative path' + u'needs sibling value "{0}" to expand relative path' ).format(self.relative_to), view ) @@ -1174,12 +1347,12 @@ class Filename(Template): if next_relative in template.subtemplates: # we encountered this config key previously raise ConfigTemplateError(( - '{0} and {1} are recursively relative' + u'{0} and {1} are recursively relative' ).format(view.name, self.relative_to)) else: raise ConfigTemplateError(( - 'missing template for {0}, needed to expand {1}\'s' + - 'relative path' + u'missing template for {0}, needed to expand {1}\'s' + + u'relative path' ).format(self.relative_to, view.name)) next_template.subtemplates[next_relative] = rel_to_template @@ -1191,7 +1364,7 @@ class Filename(Template): path, source = view.first() if not isinstance(path, BASESTRING): self.fail( - 'must be a filename, not {0}'.format(type(path).__name__), + u'must be a filename, not {0}'.format(type(path).__name__), view, True ) @@ -1229,7 +1402,7 @@ class TypeTemplate(Template): def convert(self, value, view): if not isinstance(value, self.typ): self.fail( - 'must be a {0}, not {1}'.format( + u'must be a {0}, not {1}'.format( self.typ.__name__, type(value).__name__, ), @@ -1267,6 +1440,11 @@ def as_template(value): return String() elif isinstance(value, BASESTRING): return String(value) + elif isinstance(value, set): + # convert to list to avoid hash related problems + return Choice(list(value)) + elif isinstance(value, list): + return OneOf(value) elif value is float: return Number() elif value is None: @@ -1278,4 +1456,4 @@ def as_template(value): elif isinstance(value, type): return TypeTemplate(value) else: - raise ValueError('cannot convert to template: {0!r}'.format(value)) + raise ValueError(u'cannot convert to template: {0!r}'.format(value)) diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py old mode 100644 new mode 100755 index e8cd0fe1..3e946718 --- a/lib/beets/util/enumeration.py +++ b/lib/beets/util/enumeration.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -12,6 +13,8 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. +from __future__ import division, absolute_import, print_function + from enum import Enum diff --git a/lib/beets/util/functemplate.py b/lib/beets/util/functemplate.py old mode 100644 new mode 100755 index 03e57c61..51716552 --- a/lib/beets/util/functemplate.py +++ b/lib/beets/util/functemplate.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -25,12 +26,15 @@ library: unknown symbols are left intact. This is sort of like a tiny, horrible degeneration of a real templating engine like Jinja2 or Mustache. """ -from __future__ import print_function + +from __future__ import division, absolute_import, print_function import re import ast import dis import types +import sys +import six SYMBOL_DELIM = u'$' FUNC_DELIM = u'%' @@ -70,13 +74,13 @@ def ex_literal(val): """ if val is None: return ast.Name('None', ast.Load()) - elif isinstance(val, (int, float, long)): + elif isinstance(val, six.integer_types): return ast.Num(val) elif isinstance(val, bool): - return ast.Name(str(val), ast.Load()) - elif isinstance(val, basestring): + return ast.Name(bytes(val), ast.Load()) + elif isinstance(val, six.string_types): return ast.Str(val) - raise TypeError('no literal for {0}'.format(type(val))) + raise TypeError(u'no literal for {0}'.format(type(val))) def ex_varassign(name, expr): @@ -93,7 +97,7 @@ def ex_call(func, args): function may be an expression or the name of a function. Each argument may be an expression or a value to be used as a literal. """ - if isinstance(func, basestring): + if isinstance(func, six.string_types): func = ex_rvalue(func) args = list(args) @@ -101,7 +105,10 @@ def ex_call(func, args): if not isinstance(args[i], ast.expr): args[i] = ex_literal(args[i]) - return ast.Call(func, args, [], None, None) + if sys.version_info[:2] < (3, 5): + return ast.Call(func, args, [], None, None) + else: + return ast.Call(func, args, []) def compile_func(arg_names, statements, name='_the_func', debug=False): @@ -109,16 +116,31 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): the resulting Python function. If `debug`, then print out the bytecode of the compiled function. """ - func_def = ast.FunctionDef( - name, - ast.arguments( - [ast.Name(n, ast.Param()) for n in arg_names], - None, None, - [ex_literal(None) for _ in arg_names], - ), - statements, - [], - ) + if six.PY2: + func_def = ast.FunctionDef( + name=name.encode('utf-8'), + args=ast.arguments( + args=[ast.Name(n, ast.Param()) for n in arg_names], + vararg=None, + kwarg=None, + defaults=[ex_literal(None) for _ in arg_names], + ), + body=statements, + decorator_list=[], + ) + else: + func_def = ast.FunctionDef( + name=name, + args=ast.arguments( + args=[ast.arg(arg=n, annotation=None) for n in arg_names], + kwonlyargs=[], + kw_defaults=[], + defaults=[ex_literal(None) for _ in arg_names], + ), + body=statements, + decorator_list=[], + ) + mod = ast.Module([func_def]) ast.fix_missing_locations(mod) @@ -132,7 +154,7 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): dis.dis(const) the_locals = {} - exec prog in {}, the_locals + exec(prog, {}, the_locals) return the_locals[name] @@ -160,8 +182,12 @@ class Symbol(object): def translate(self): """Compile the variable lookup.""" - expr = ex_rvalue(VARIABLE_PREFIX + self.ident.encode('utf8')) - return [expr], set([self.ident.encode('utf8')]), set() + if six.PY2: + ident = self.ident.encode('utf-8') + else: + ident = self.ident + expr = ex_rvalue(VARIABLE_PREFIX + ident) + return [expr], set([ident]), set() class Call(object): @@ -186,15 +212,19 @@ class Call(object): except Exception as exc: # Function raised exception! Maybe inlining the name of # the exception will help debug. - return u'<%s>' % unicode(exc) - return unicode(out) + return u'<%s>' % six.text_type(exc) + return six.text_type(out) else: return self.original def translate(self): """Compile the function call.""" varnames = set() - funcnames = set([self.ident.encode('utf8')]) + if six.PY2: + ident = self.ident.encode('utf-8') + else: + ident = self.ident + funcnames = set([ident]) arg_exprs = [] for arg in self.args: @@ -209,14 +239,14 @@ class Call(object): [ex_call( 'map', [ - ex_rvalue('unicode'), + ex_rvalue(six.text_type.__name__), ast.List(subexprs, ast.Load()), ] )], )) subexpr_call = ex_call( - FUNCTION_PREFIX + self.ident.encode('utf8'), + FUNCTION_PREFIX + ident, arg_exprs ) return [subexpr_call], varnames, funcnames @@ -238,11 +268,11 @@ class Expression(object): """ out = [] for part in self.parts: - if isinstance(part, basestring): + if isinstance(part, six.string_types): out.append(part) else: out.append(part.evaluate(env)) - return u''.join(map(unicode, out)) + return u''.join(map(six.text_type, out)) def translate(self): """Compile the expression to a list of Python AST expressions, a @@ -252,7 +282,7 @@ class Expression(object): varnames = set() funcnames = set() for part in self.parts: - if isinstance(part, basestring): + if isinstance(part, six.string_types): expressions.append(ex_literal(part)) else: e, v, f = part.translate() @@ -281,16 +311,24 @@ class Parser(object): replaced with a real, accepted parsing technique (PEG, parser generator, etc.). """ - def __init__(self, string): + def __init__(self, string, in_argument=False): + """ Create a new parser. + :param in_arguments: boolean that indicates the parser is to be + used for parsing function arguments, ie. considering commas + (`ARG_SEP`) a special character + """ self.string = string + self.in_argument = in_argument self.pos = 0 self.parts = [] # Common parsing resources. special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, - ARG_SEP, ESCAPE_CHAR) - special_char_re = re.compile(ur'[%s]|$' % + ESCAPE_CHAR) + special_char_re = re.compile(r'[%s]|$' % u''.join(re.escape(c) for c in special_chars)) + escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) + terminator_chars = (GROUP_CLOSE,) def parse_expression(self): """Parse a template expression starting at ``pos``. Resulting @@ -298,17 +336,27 @@ class Parser(object): the ``parts`` field, a list. The ``pos`` field is updated to be the next character after the expression. """ + # Append comma (ARG_SEP) to the list of special characters only when + # parsing function arguments. + extra_special_chars = () + special_char_re = self.special_char_re + if self.in_argument: + extra_special_chars = (ARG_SEP,) + special_char_re = re.compile( + r'[%s]|$' % u''.join(re.escape(c) for c in + self.special_chars + extra_special_chars)) + text_parts = [] while self.pos < len(self.string): char = self.string[self.pos] - if char not in self.special_chars: + if char not in self.special_chars + extra_special_chars: # A non-special character. Skip to the next special # character, treating the interstice as literal text. next_pos = ( - self.special_char_re.search(self.string[self.pos:]).start() - + self.pos + special_char_re.search( + self.string[self.pos:]).start() + self.pos ) text_parts.append(self.string[self.pos:next_pos]) self.pos = next_pos @@ -318,14 +366,14 @@ class Parser(object): # The last character can never begin a structure, so we # just interpret it as a literal character (unless it # terminates the expression, as with , and }). - if char not in (GROUP_CLOSE, ARG_SEP): + if char not in self.terminator_chars + extra_special_chars: text_parts.append(char) self.pos += 1 break next_char = self.string[self.pos + 1] - if char == ESCAPE_CHAR and next_char in \ - (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): + if char == ESCAPE_CHAR and next_char in (self.escapable_chars + + extra_special_chars): # An escaped special character ($$, $}, etc.). Note that # ${ is not an escape sequence: this is ambiguous with # the start of a symbol and it's not necessary (just @@ -345,7 +393,7 @@ class Parser(object): elif char == FUNC_DELIM: # Parse a function call. self.parse_call() - elif char in (GROUP_CLOSE, ARG_SEP): + elif char in self.terminator_chars + extra_special_chars: # Template terminated. break elif char == GROUP_OPEN: @@ -453,7 +501,7 @@ class Parser(object): expressions = [] while self.pos < len(self.string): - subparser = Parser(self.string[self.pos:]) + subparser = Parser(self.string[self.pos:], in_argument=True) subparser.parse_expression() # Extract and advance past the parsed expression. @@ -477,7 +525,7 @@ class Parser(object): Updates ``pos``. """ remainder = self.string[self.pos:] - ident = re.match(ur'\w*', remainder).group(0) + ident = re.match(r'\w*', remainder).group(0) self.pos += len(ident) return ident @@ -524,6 +572,7 @@ class Template(object): res = self.compiled(values, functions) except: # Handle any exceptions thrown by compiled version. res = self.interpret(values, functions) + return res def translate(self): @@ -532,9 +581,9 @@ class Template(object): argnames = [] for varname in varnames: - argnames.append(VARIABLE_PREFIX.encode('utf8') + varname) + argnames.append(VARIABLE_PREFIX + varname) for funcname in funcnames: - argnames.append(FUNCTION_PREFIX.encode('utf8') + funcname) + argnames.append(FUNCTION_PREFIX + funcname) func = compile_func( argnames, @@ -559,7 +608,7 @@ if __name__ == '__main__': import timeit _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') _vars = {'bar': 'qux'} - _funcs = {'baz': unicode.upper} + _funcs = {'baz': six.text_type.upper} interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', 'from __main__ import _tmpl, _vars, _funcs', number=10000) @@ -568,4 +617,4 @@ if __name__ == '__main__': 'from __main__ import _tmpl, _vars, _funcs', number=10000) print(comp_time) - print('Speedup:', interp_time / comp_time) + print(u'Speedup:', interp_time / comp_time) diff --git a/lib/beets/util/hidden.py b/lib/beets/util/hidden.py new file mode 100755 index 00000000..ed97f2bf --- /dev/null +++ b/lib/beets/util/hidden.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Simple library to work out if a file is hidden on different platforms.""" +from __future__ import division, absolute_import, print_function + +import os +import stat +import ctypes +import sys +import beets.util + + +def _is_hidden_osx(path): + """Return whether or not a file is hidden on OS X. + + This uses os.lstat to work out if a file has the "hidden" flag. + """ + file_stat = os.lstat(beets.util.syspath(path)) + + if hasattr(file_stat, 'st_flags') and hasattr(stat, 'UF_HIDDEN'): + return bool(file_stat.st_flags & stat.UF_HIDDEN) + else: + return False + + +def _is_hidden_win(path): + """Return whether or not a file is hidden on Windows. + + This uses GetFileAttributes to work out if a file has the "hidden" flag + (FILE_ATTRIBUTE_HIDDEN). + """ + # FILE_ATTRIBUTE_HIDDEN = 2 (0x2) from GetFileAttributes documentation. + hidden_mask = 2 + + # Retrieve the attributes for the file. + attrs = ctypes.windll.kernel32.GetFileAttributesW(beets.util.syspath(path)) + + # Ensure we have valid attribues and compare them against the mask. + return attrs >= 0 and attrs & hidden_mask + + +def _is_hidden_dot(path): + """Return whether or not a file starts with a dot. + + Files starting with a dot are seen as "hidden" files on Unix-based OSes. + """ + return os.path.basename(path).startswith(b'.') + + +def is_hidden(path): + """Return whether or not a file is hidden. `path` should be a + bytestring filename. + + This method works differently depending on the platform it is called on. + + On OS X, it uses both the result of `is_hidden_osx` and `is_hidden_dot` to + work out if a file is hidden. + + On Windows, it uses the result of `is_hidden_win` to work out if a file is + hidden. + + On any other operating systems (i.e. Linux), it uses `is_hidden_dot` to + work out if a file is hidden. + """ + # Run platform specific functions depending on the platform + if sys.platform == 'darwin': + return _is_hidden_osx(path) or _is_hidden_dot(path) + elif sys.platform == 'win32': + return _is_hidden_win(path) + else: + return _is_hidden_dot(path) + +__all__ = ['is_hidden'] diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py old mode 100644 new mode 100755 index d267789c..367e5d98 --- a/lib/beets/util/pipeline.py +++ b/lib/beets/util/pipeline.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -30,11 +31,13 @@ up a bottleneck stage by dividing its work among multiple threads. To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ -from __future__ import print_function -import Queue +from __future__ import division, absolute_import, print_function + +from six.moves import queue from threading import Thread, Lock import sys +import six BUBBLE = '__PIPELINE_BUBBLE__' POISON = '__PIPELINE_POISON__' @@ -61,7 +64,17 @@ def _invalidate_queue(q, val=None, sync=True): q.mutex.acquire() try: - q.maxsize = 0 + # Originally, we set `maxsize` to 0 here, which is supposed to mean + # an unlimited queue size. However, there is a race condition since + # Python 3.2 when this attribute is changed while another thread is + # waiting in put()/get() due to a full/empty queue. + # Setting it to 2 is still hacky because Python does not give any + # guarantee what happens if Queue methods/attributes are overwritten + # when it is already in use. However, because of our dummy _put() + # and _get() methods, it provides a workaround to let the queue appear + # to be never empty or full. + # See issue https://github.com/beetbox/beets/issues/2078 + q.maxsize = 2 q._qsize = _qsize q._put = _put q._get = _get @@ -73,13 +86,13 @@ def _invalidate_queue(q, val=None, sync=True): q.mutex.release() -class CountedQueue(Queue.Queue): +class CountedQueue(queue.Queue): """A queue that keeps track of the number of threads that are still feeding into it. The queue is poisoned when all threads are finished with the queue. """ def __init__(self, maxsize=0): - Queue.Queue.__init__(self, maxsize) + queue.Queue.__init__(self, maxsize) self.nthreads = 0 self.poisoned = False @@ -246,7 +259,7 @@ class FirstPipelineThread(PipelineThread): # Get the value from the generator. try: - msg = self.coro.next() + msg = next(self.coro) except StopIteration: break @@ -279,7 +292,7 @@ class MiddlePipelineThread(PipelineThread): def run(self): try: # Prime the coroutine. - self.coro.next() + next(self.coro) while True: with self.abort_lock: @@ -324,7 +337,7 @@ class LastPipelineThread(PipelineThread): def run(self): # Prime the coroutine. - self.coro.next() + next(self.coro) try: while True: @@ -359,7 +372,7 @@ class Pipeline(object): be at least two stages. """ if len(stages) < 2: - raise ValueError('pipeline must have at least two stages') + raise ValueError(u'pipeline must have at least two stages') self.stages = [] for stage in stages: if isinstance(stage, (list, tuple)): @@ -409,7 +422,7 @@ class Pipeline(object): try: # Using a timeout allows us to receive KeyboardInterrupt # exceptions during the join(). - while threads[-1].isAlive(): + while threads[-1].is_alive(): threads[-1].join(1) except: @@ -429,7 +442,7 @@ class Pipeline(object): exc_info = thread.exc_info if exc_info: # Make the exception appear as it was raised originally. - raise exc_info[0], exc_info[1], exc_info[2] + six.reraise(exc_info[0], exc_info[1], exc_info[2]) def pull(self): """Yield elements from the end of the pipeline. Runs the stages @@ -442,7 +455,7 @@ class Pipeline(object): # "Prime" the coroutines. for coro in coros[1:]: - coro.next() + next(coro) # Begin the pipeline. for out in coros[0]: @@ -464,14 +477,14 @@ if __name__ == '__main__': # in parallel. def produce(): for i in range(5): - print('generating %i' % i) + print(u'generating %i' % i) time.sleep(1) yield i def work(): num = yield while True: - print('processing %i' % num) + print(u'processing %i' % num) time.sleep(2) num = yield num * 2 @@ -479,7 +492,7 @@ if __name__ == '__main__': while True: num = yield time.sleep(1) - print('received %i' % num) + print(u'received %i' % num) ts_start = time.time() Pipeline([produce(), work(), consume()]).run_sequential() @@ -488,22 +501,22 @@ if __name__ == '__main__': ts_par = time.time() Pipeline([produce(), (work(), work()), consume()]).run_parallel() ts_end = time.time() - print('Sequential time:', ts_seq - ts_start) - print('Parallel time:', ts_par - ts_seq) - print('Multiply-parallel time:', ts_end - ts_par) + print(u'Sequential time:', ts_seq - ts_start) + print(u'Parallel time:', ts_par - ts_seq) + print(u'Multiply-parallel time:', ts_end - ts_par) print() # Test a pipeline that raises an exception. def exc_produce(): for i in range(10): - print('generating %i' % i) + print(u'generating %i' % i) time.sleep(1) yield i def exc_work(): num = yield while True: - print('processing %i' % num) + print(u'processing %i' % num) time.sleep(3) if num == 3: raise Exception() @@ -512,6 +525,6 @@ if __name__ == '__main__': def exc_consume(): while True: num = yield - print('received %i' % num) + print(u'received %i' % num) Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py old mode 100644 new mode 100755 index e940e21f..7f9a049e --- a/lib/beets/vfs.py +++ b/lib/beets/vfs.py @@ -1,5 +1,6 @@ +# -*- coding: utf-8 -*- # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -15,6 +16,8 @@ """A simple utility for constructing filesystem-like trees from beets libraries. """ +from __future__ import division, absolute_import, print_function + from collections import namedtuple from beets import util diff --git a/lib/jellyfish/__init__.py b/lib/jellyfish/__init__.py new file mode 100644 index 00000000..78345699 --- /dev/null +++ b/lib/jellyfish/__init__.py @@ -0,0 +1,4 @@ +try: + from .cjellyfish import * # noqa +except ImportError: + from ._jellyfish import * # noqa diff --git a/lib/jellyfish/_jellyfish.py b/lib/jellyfish/_jellyfish.py new file mode 100644 index 00000000..f3a4c9b5 --- /dev/null +++ b/lib/jellyfish/_jellyfish.py @@ -0,0 +1,488 @@ +import unicodedata +from collections import defaultdict +from .compat import _range, _zip_longest, _no_bytes_err +from .porter import Stemmer + + +def _normalize(s): + return unicodedata.normalize('NFKD', s) + + +def levenshtein_distance(s1, s2): + if isinstance(s1, bytes) or isinstance(s2, bytes): + raise TypeError(_no_bytes_err) + + if s1 == s2: + return 0 + rows = len(s1)+1 + cols = len(s2)+1 + + if not s1: + return cols-1 + if not s2: + return rows-1 + + prev = None + cur = range(cols) + for r in _range(1, rows): + prev, cur = cur, [r] + [0]*(cols-1) + for c in _range(1, cols): + deletion = prev[c] + 1 + insertion = cur[c-1] + 1 + edit = prev[c-1] + (0 if s1[r-1] == s2[c-1] else 1) + cur[c] = min(edit, deletion, insertion) + + return cur[-1] + + +def _jaro_winkler(ying, yang, long_tolerance, winklerize): + if isinstance(ying, bytes) or isinstance(yang, bytes): + raise TypeError(_no_bytes_err) + + ying_len = len(ying) + yang_len = len(yang) + + if not ying_len or not yang_len: + return 0 + + min_len = max(ying_len, yang_len) + search_range = (min_len // 2) - 1 + if search_range < 0: + search_range = 0 + + ying_flags = [False]*ying_len + yang_flags = [False]*yang_len + + # looking only within search range, count & flag matched pairs + common_chars = 0 + for i, ying_ch in enumerate(ying): + low = i - search_range if i > search_range else 0 + hi = i + search_range if i + search_range < yang_len else yang_len - 1 + for j in _range(low, hi+1): + if not yang_flags[j] and yang[j] == ying_ch: + ying_flags[i] = yang_flags[j] = True + common_chars += 1 + break + + # short circuit if no characters match + if not common_chars: + return 0 + + # count transpositions + k = trans_count = 0 + for i, ying_f in enumerate(ying_flags): + if ying_f: + for j in _range(k, yang_len): + if yang_flags[j]: + k = j + 1 + break + if ying[i] != yang[j]: + trans_count += 1 + trans_count /= 2 + + # adjust for similarities in nonmatched characters + common_chars = float(common_chars) + weight = ((common_chars/ying_len + common_chars/yang_len + + (common_chars-trans_count) / common_chars)) / 3 + + # winkler modification: continue to boost if strings are similar + if winklerize and weight > 0.7 and ying_len > 3 and yang_len > 3: + # adjust for up to first 4 chars in common + j = min(min_len, 4) + i = 0 + while i < j and ying[i] == yang[i] and ying[i]: + i += 1 + if i: + weight += i * 0.1 * (1.0 - weight) + + # optionally adjust for long strings + # after agreeing beginning chars, at least two or more must agree and + # agreed characters must be > half of remaining characters + if (long_tolerance and min_len > 4 and common_chars > i+1 and + 2 * common_chars >= min_len + i): + weight += ((1.0 - weight) * (float(common_chars-i-1) / float(ying_len+yang_len-i*2+2))) + + return weight + + +def damerau_levenshtein_distance(s1, s2): + if isinstance(s1, bytes) or isinstance(s2, bytes): + raise TypeError(_no_bytes_err) + + len1 = len(s1) + len2 = len(s2) + infinite = len1 + len2 + + # character array + da = defaultdict(int) + + # distance matrix + score = [[0]*(len2+2) for x in _range(len1+2)] + + score[0][0] = infinite + for i in _range(0, len1+1): + score[i+1][0] = infinite + score[i+1][1] = i + for i in _range(0, len2+1): + score[0][i+1] = infinite + score[1][i+1] = i + + for i in _range(1, len1+1): + db = 0 + for j in _range(1, len2+1): + i1 = da[s2[j-1]] + j1 = db + cost = 1 + if s1[i-1] == s2[j-1]: + cost = 0 + db = j + + score[i+1][j+1] = min(score[i][j] + cost, + score[i+1][j] + 1, + score[i][j+1] + 1, + score[i1][j1] + (i-i1-1) + 1 + (j-j1-1)) + da[s1[i-1]] = i + + return score[len1+1][len2+1] + + +def jaro_distance(s1, s2): + return _jaro_winkler(s1, s2, False, False) + + +def jaro_winkler(s1, s2, long_tolerance=False): + return _jaro_winkler(s1, s2, long_tolerance, True) + + +def soundex(s): + if not s: + return s + if isinstance(s, bytes): + raise TypeError(_no_bytes_err) + + s = _normalize(s) + + replacements = (('bfpv', '1'), + ('cgjkqsxz', '2'), + ('dt', '3'), + ('l', '4'), + ('mn', '5'), + ('r', '6')) + result = [s[0]] + count = 1 + + # find would-be replacment for first character + for lset, sub in replacements: + if s[0].lower() in lset: + last = sub + break + else: + last = None + + for letter in s[1:]: + for lset, sub in replacements: + if letter.lower() in lset: + if sub != last: + result.append(sub) + count += 1 + last = sub + break + else: + last = None + if count == 4: + break + + result += '0'*(4-count) + return ''.join(result) + + +def hamming_distance(s1, s2): + if isinstance(s1, bytes) or isinstance(s2, bytes): + raise TypeError(_no_bytes_err) + + # ensure length of s1 >= s2 + if len(s2) > len(s1): + s1, s2 = s2, s1 + + # distance is difference in length + differing chars + distance = len(s1) - len(s2) + for i, c in enumerate(s2): + if c != s1[i]: + distance += 1 + + return distance + + +def nysiis(s): + if isinstance(s, bytes): + raise TypeError(_no_bytes_err) + if not s: + return '' + + s = s.upper() + key = [] + + # step 1 - prefixes + if s.startswith('MAC'): + s = 'MCC' + s[3:] + elif s.startswith('KN'): + s = s[1:] + elif s.startswith('K'): + s = 'C' + s[1:] + elif s.startswith(('PH', 'PF')): + s = 'FF' + s[2:] + elif s.startswith('SCH'): + s = 'SSS' + s[3:] + + # step 2 - suffixes + if s.endswith(('IE', 'EE')): + s = s[:-2] + 'Y' + elif s.endswith(('DT', 'RT', 'RD', 'NT', 'ND')): + s = s[:-2] + 'D' + + # step 3 - first character of key comes from name + key.append(s[0]) + + # step 4 - translate remaining chars + i = 1 + len_s = len(s) + while i < len_s: + ch = s[i] + if ch == 'E' and i+1 < len_s and s[i+1] == 'V': + ch = 'AF' + i += 1 + elif ch in 'AEIOU': + ch = 'A' + elif ch == 'Q': + ch = 'G' + elif ch == 'Z': + ch = 'S' + elif ch == 'M': + ch = 'N' + elif ch == 'K': + if i+1 < len(s) and s[i+1] == 'N': + ch = 'N' + else: + ch = 'C' + elif ch == 'S' and s[i+1:i+3] == 'CH': + ch = 'SS' + i += 2 + elif ch == 'P' and i+1 < len(s) and s[i+1] == 'H': + ch = 'F' + i += 1 + elif ch == 'H' and (s[i-1] not in 'AEIOU' or (i+1 < len(s) and s[i+1] not in 'AEIOU')): + if s[i-1] in 'AEIOU': + ch = 'A' + else: + ch = s[i-1] + elif ch == 'W' and s[i-1] in 'AEIOU': + ch = s[i-1] + + if ch[-1] != key[-1][-1]: + key.append(ch) + + i += 1 + + key = ''.join(key) + + # step 5 - remove trailing S + if key.endswith('S') and key != 'S': + key = key[:-1] + + # step 6 - replace AY w/ Y + if key.endswith('AY'): + key = key[:-2] + 'Y' + + # step 7 - remove trailing A + if key.endswith('A') and key != 'A': + key = key[:-1] + + # step 8 was already done + + return key + + +def match_rating_codex(s): + if isinstance(s, bytes): + raise TypeError(_no_bytes_err) + s = s.upper() + codex = [] + + prev = None + for i, c in enumerate(s): + # not a space OR + # starting character & vowel + # or consonant not preceded by same consonant + if (c != ' ' and (i == 0 and c in 'AEIOU') or (c not in 'AEIOU' and c != prev)): + codex.append(c) + + prev = c + + # just use first/last 3 + if len(codex) > 6: + return ''.join(codex[:3]+codex[-3:]) + else: + return ''.join(codex) + + +def match_rating_comparison(s1, s2): + codex1 = match_rating_codex(s1) + codex2 = match_rating_codex(s2) + len1 = len(codex1) + len2 = len(codex2) + res1 = [] + res2 = [] + + # length differs by 3 or more, no result + if abs(len1-len2) >= 3: + return None + + # get minimum rating based on sums of codexes + lensum = len1 + len2 + if lensum <= 4: + min_rating = 5 + elif lensum <= 7: + min_rating = 4 + elif lensum <= 11: + min_rating = 3 + else: + min_rating = 2 + + # strip off common prefixes + for c1, c2 in _zip_longest(codex1, codex2): + if c1 != c2: + if c1: + res1.append(c1) + if c2: + res2.append(c2) + + unmatched_count1 = unmatched_count2 = 0 + for c1, c2 in _zip_longest(reversed(res1), reversed(res2)): + if c1 != c2: + if c1: + unmatched_count1 += 1 + if c2: + unmatched_count2 += 1 + + return (6 - max(unmatched_count1, unmatched_count2)) >= min_rating + + +def metaphone(s): + if isinstance(s, bytes): + raise TypeError(_no_bytes_err) + + result = [] + + s = _normalize(s.lower()) + + # skip first character if s starts with these + if s.startswith(('kn', 'gn', 'pn', 'ac', 'wr', 'ae')): + s = s[1:] + + i = 0 + + while i < len(s): + c = s[i] + next = s[i+1] if i < len(s)-1 else '*****' + nextnext = s[i+2] if i < len(s)-2 else '*****' + + # skip doubles except for cc + if c == next and c != 'c': + i += 1 + continue + + if c in 'aeiou': + if i == 0 or s[i-1] == ' ': + result.append(c) + elif c == 'b': + if (not (i != 0 and s[i-1] == 'm')) or next: + result.append('b') + elif c == 'c': + if next == 'i' and nextnext == 'a' or next == 'h': + result.append('x') + i += 1 + elif next in 'iey': + result.append('s') + i += 1 + else: + result.append('k') + elif c == 'd': + if next == 'g' and nextnext in 'iey': + result.append('j') + i += 2 + else: + result.append('t') + elif c in 'fjlmnr': + result.append(c) + elif c == 'g': + if next in 'iey': + result.append('j') + elif next not in 'hn': + result.append('k') + elif next == 'h' and nextnext and nextnext not in 'aeiou': + i += 1 + elif c == 'h': + if i == 0 or next in 'aeiou' or s[i-1] not in 'aeiou': + result.append('h') + elif c == 'k': + if i == 0 or s[i-1] != 'c': + result.append('k') + elif c == 'p': + if next == 'h': + result.append('f') + i += 1 + else: + result.append('p') + elif c == 'q': + result.append('k') + elif c == 's': + if next == 'h': + result.append('x') + i += 1 + elif next == 'i' and nextnext in 'oa': + result.append('x') + i += 2 + else: + result.append('s') + elif c == 't': + if next == 'i' and nextnext in 'oa': + result.append('x') + elif next == 'h': + result.append('0') + i += 1 + elif next != 'c' or nextnext != 'h': + result.append('t') + elif c == 'v': + result.append('f') + elif c == 'w': + if i == 0 and next == 'h': + i += 1 + if nextnext in 'aeiou' or nextnext == '*****': + result.append('w') + elif c == 'x': + if i == 0: + if next == 'h' or (next == 'i' and nextnext in 'oa'): + result.append('x') + else: + result.append('s') + else: + result.append('k') + result.append('s') + elif c == 'y': + if next in 'aeiou': + result.append('y') + elif c == 'z': + result.append('s') + elif c == ' ': + if len(result) > 0 and result[-1] != ' ': + result.append(' ') + + i += 1 + + return ''.join(result).upper() + + +def porter_stem(s): + if isinstance(s, bytes): + raise TypeError(_no_bytes_err) + return Stemmer(s).stem() diff --git a/lib/jellyfish/compat.py b/lib/jellyfish/compat.py new file mode 100644 index 00000000..b5e09792 --- /dev/null +++ b/lib/jellyfish/compat.py @@ -0,0 +1,13 @@ +import sys +import itertools + +IS_PY3 = sys.version_info[0] == 3 + +if IS_PY3: + _range = range + _zip_longest = itertools.zip_longest + _no_bytes_err = 'expected str, got bytes' +else: + _range = xrange + _zip_longest = itertools.izip_longest + _no_bytes_err = 'expected unicode, got str' diff --git a/lib/jellyfish/porter.py b/lib/jellyfish/porter.py new file mode 100644 index 00000000..2945b22d --- /dev/null +++ b/lib/jellyfish/porter.py @@ -0,0 +1,218 @@ +from .compat import _range + +_s2_options = { + 'a': ((['a', 't', 'i', 'o', 'n', 'a', 'l'], ['a', 't', 'e']), + (['t', 'i', 'o', 'n', 'a', 'l'], ['t', 'i', 'o', 'n'])), + 'c': ((['e', 'n', 'c', 'i'], ['e', 'n', 'c', 'e']), + (['a', 'n', 'c', 'i'], ['a', 'n', 'c', 'e']),), + 'e': ((['i', 'z', 'e', 'r'], ['i', 'z', 'e']),), + 'l': ((['b', 'l', 'i'], ['b', 'l', 'e']), + (['a', 'l', 'l', 'i'], ['a', 'l']), + (['e', 'n', 't', 'l', 'i'], ['e', 'n', 't']), + (['e', 'l', 'i'], ['e']), + (['o', 'u', 's', 'l', 'i'], ['o', 'u', 's']),), + 'o': ((['i', 'z', 'a', 't', 'i', 'o', 'n'], ['i', 'z', 'e']), + (['a', 't', 'i', 'o', 'n'], ['a', 't', 'e']), + (['a', 't', 'o', 'r'], ['a', 't', 'e']),), + 's': ((['a', 'l', 'i', 's', 'm'], ['a', 'l']), + (['i', 'v', 'e', 'n', 'e', 's', 's'], ['i', 'v', 'e']), + (['f', 'u', 'l', 'n', 'e', 's', 's'], ['f', 'u', 'l']), + (['o', 'u', 's', 'n', 'e', 's', 's'], ['o', 'u', 's']),), + 't': ((['a', 'l', 'i', 't', 'i'], ['a', 'l']), + (['i', 'v', 'i', 't', 'i'], ['i', 'v', 'e']), + (['b', 'i', 'l', 'i', 't', 'i'], ['b', 'l', 'e']),), + 'g': ((['l', 'o', 'g', 'i'], ['l', 'o', 'g']),), +} + + +_s3_options = { + 'e': ((['i', 'c', 'a', 't', 'e'], ['i', 'c']), + (['a', 't', 'i', 'v', 'e'], []), + (['a', 'l', 'i', 'z', 'e'], ['a', 'l']),), + 'i': ((['i', 'c', 'i', 't', 'i'], ['i', 'c']),), + 'l': ((['i', 'c', 'a', 'l'], ['i', 'c']), + (['f', 'u', 'l'], []),), + 's': ((['n', 'e', 's', 's'], []),), +} + +_s4_endings = { + 'a': (['a', 'l'],), + 'c': (['a', 'n', 'c', 'e'], ['e', 'n', 'c', 'e']), + 'e': (['e', 'r'],), + 'i': (['i', 'c'],), + 'l': (['a', 'b', 'l', 'e'], ['i', 'b', 'l', 'e']), + 'n': (['a', 'n', 't'], ['e', 'm', 'e', 'n', 't'], ['m', 'e', 'n', 't'], + ['e', 'n', 't']), + # handle 'o' separately + 's': (['i', 's', 'm'],), + 't': (['a', 't', 'e'], ['i', 't', 'i']), + 'u': (['o', 'u', 's'],), + 'v': (['i', 'v', 'e'],), + 'z': (['i', 'z', 'e'],), +} + + +class Stemmer(object): + def __init__(self, b): + self.b = list(b) + self.k = len(b)-1 + self.j = 0 + + def cons(self, i): + """ True iff b[i] is a consonant """ + if self.b[i] in 'aeiou': + return False + elif self.b[i] == 'y': + return True if i == 0 else not self.cons(i-1) + return True + + def m(self): + n = i = 0 + while True: + if i > self.j: + return n + if not self.cons(i): + break + i += 1 + i += 1 + while True: + while True: + if i > self.j: + return n + if self.cons(i): + break + i += 1 + + i += 1 + n += 1 + + while True: + if i > self.j: + return n + if not self.cons(i): + break + i += 1 + i += 1 + + def vowel_in_stem(self): + """ True iff 0...j contains vowel """ + for i in _range(0, self.j+1): + if not self.cons(i): + return True + return False + + def doublec(self, j): + """ True iff j, j-1 contains double consonant """ + if j < 1 or self.b[j] != self.b[j-1]: + return False + return self.cons(j) + + def cvc(self, i): + """ True iff i-2,i-1,i is consonent-vowel consonant + and if second c isn't w,x, or y. + used to restore e at end of short words like cave, love, hope, crime + """ + if (i < 2 or not self.cons(i) or self.cons(i-1) or not self.cons(i-2) or + self.b[i] in 'wxy'): + return False + return True + + def ends(self, s): + length = len(s) + """ True iff 0...k ends with string s """ + res = (self.b[self.k-length+1:self.k+1] == s) + if res: + self.j = self.k - length + return res + + def setto(self, s): + """ set j+1...k to string s, readjusting k """ + length = len(s) + self.b[self.j+1:self.j+1+length] = s + self.k = self.j + length + + def r(self, s): + if self.m() > 0: + self.setto(s) + + def step1ab(self): + if self.b[self.k] == 's': + if self.ends(['s', 's', 'e', 's']): + self.k -= 2 + elif self.ends(['i', 'e', 's']): + self.setto(['i']) + elif self.b[self.k-1] != 's': + self.k -= 1 + if self.ends(['e', 'e', 'd']): + if self.m() > 0: + self.k -= 1 + elif ((self.ends(['e', 'd']) or self.ends(['i', 'n', 'g'])) and + self.vowel_in_stem()): + self.k = self.j + if self.ends(['a', 't']): + self.setto(['a', 't', 'e']) + elif self.ends(['b', 'l']): + self.setto(['b', 'l', 'e']) + elif self.ends(['i', 'z']): + self.setto(['i', 'z', 'e']) + elif self.doublec(self.k): + self.k -= 1 + if self.b[self.k] in 'lsz': + self.k += 1 + elif self.m() == 1 and self.cvc(self.k): + self.setto(['e']) + + def step1c(self): + """ turn terminal y into i if there's a vowel in stem """ + if self.ends(['y']) and self.vowel_in_stem(): + self.b[self.k] = 'i' + + def step2and3(self): + for end, repl in _s2_options.get(self.b[self.k-1], []): + if self.ends(end): + self.r(repl) + break + + for end, repl in _s3_options.get(self.b[self.k], []): + if self.ends(end): + self.r(repl) + break + + def step4(self): + ch = self.b[self.k-1] + + if ch == 'o': + if not ((self.ends(['i', 'o', 'n']) and self.b[self.j] in 'st') or + self.ends(['o', 'u'])): + return + else: + endings = _s4_endings.get(ch, []) + for end in endings: + if self.ends(end): + break + else: + return + + if self.m() > 1: + self.k = self.j + + def step5(self): + self.j = self.k + if self.b[self.k] == 'e': + a = self.m() + if a > 1 or a == 1 and not self.cvc(self.k-1): + self.k -= 1 + if self.b[self.k] == 'l' and self.doublec(self.k) and self.m() > 1: + self.k -= 1 + + def result(self): + return ''.join(self.b[:self.k+1]) + + def stem(self): + if self.k > 1: + self.step1ab() + self.step1c() + self.step2and3() + self.step4() + self.step5() + return self.result() diff --git a/lib/jellyfish/test.py b/lib/jellyfish/test.py new file mode 100644 index 00000000..72ef9344 --- /dev/null +++ b/lib/jellyfish/test.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- +import sys +if sys.version_info[0] < 3: + import unicodecsv as csv + open_kwargs = {} +else: + import csv + open_kwargs = {'encoding': 'utf8'} +import platform +import pytest + + +def assertAlmostEqual(a, b, places=3): + assert abs(a - b) < (0.1**places) + + +if platform.python_implementation() == 'CPython': + implementations = ['python', 'c'] +else: + implementations = ['python'] + + +@pytest.fixture(params=implementations) +def jf(request): + if request.param == 'python': + from jellyfish import _jellyfish as jf + else: + from jellyfish import cjellyfish as jf + return jf + + +def _load_data(name): + with open('testdata/{}.csv'.format(name), **open_kwargs) as f: + for data in csv.reader(f): + yield data + + +@pytest.mark.parametrize("s1,s2,value", _load_data('jaro_winkler'), ids=str) +def test_jaro_winkler(jf, s1, s2, value): + value = float(value) + assertAlmostEqual(jf.jaro_winkler(s1, s2), value, places=3) + + +@pytest.mark.parametrize("s1,s2,value", _load_data('jaro_distance'), ids=str) +def test_jaro_distance(jf, s1, s2, value): + value = float(value) + assertAlmostEqual(jf.jaro_distance(s1, s2), value, places=3) + + +@pytest.mark.parametrize("s1,s2,value", _load_data('hamming'), ids=str) +def test_hamming_distance(jf, s1, s2, value): + value = int(value) + assert jf.hamming_distance(s1, s2) == value + + +@pytest.mark.parametrize("s1,s2,value", _load_data('levenshtein'), ids=str) +def test_levenshtein_distance(jf, s1, s2, value): + value = int(value) + assert jf.levenshtein_distance(s1, s2) == value + + +@pytest.mark.parametrize("s1,s2,value", _load_data('damerau_levenshtein'), ids=str) +def test_damerau_levenshtein_distance(jf, s1, s2, value): + value = int(value) + assert jf.damerau_levenshtein_distance(s1, s2) == value + + +@pytest.mark.parametrize("s1,code", _load_data('soundex'), ids=str) +def test_soundex(jf, s1, code): + assert jf.soundex(s1) == code + + +@pytest.mark.parametrize("s1,code", _load_data('metaphone'), ids=str) +def test_metaphone(jf, s1, code): + assert jf.metaphone(s1) == code + + +@pytest.mark.parametrize("s1,s2", _load_data('nysiis'), ids=str) +def test_nysiis(jf, s1, s2): + assert jf.nysiis(s1) == s2 + + +@pytest.mark.parametrize("s1,s2", _load_data('match_rating_codex'), ids=str) +def test_match_rating_codex(jf, s1, s2): + assert jf.match_rating_codex(s1) == s2 + + +@pytest.mark.parametrize("s1,s2,value", _load_data('match_rating_comparison'), ids=str) +def test_match_rating_comparison(jf, s1, s2, value): + value = {'True': True, 'False': False, 'None': None}[value] + assert jf.match_rating_comparison(s1, s2) is value + + +# use non-parameterized version for speed +# @pytest.mark.parametrize("a,b", _load_data('porter'), ids=str) +# def test_porter_stem(jf, a, b): +# assert jf.porter_stem(a) == b + +def test_porter_stem(jf): + with open('testdata/porter.csv', **open_kwargs) as f: + reader = csv.reader(f) + for (a, b) in reader: + assert jf.porter_stem(a) == b + + +if platform.python_implementation() == 'CPython': + def test_match_rating_comparison_segfault(): + import hashlib + from jellyfish import cjellyfish as jf + sha1s = [u'{}'.format(hashlib.sha1(str(v).encode('ascii')).hexdigest()) + for v in range(100)] + # this segfaulted on 0.1.2 + assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in sha1s] + + + def test_damerau_levenshtein_unicode_segfault(): + # unfortunate difference in behavior between Py & C versions + from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl + from jellyfish._jellyfish import damerau_levenshtein_distance as py_dl + s1 = u'mylifeoutdoors' + s2 = u'нахлыст' + with pytest.raises(ValueError): + c_dl(s1, s2) + with pytest.raises(ValueError): + c_dl(s2, s1) + + assert py_dl(s1, s2) == 14 + assert py_dl(s2, s1) == 14 + + +def test_jaro_winkler_long_tolerance(jf): + no_lt = jf.jaro_winkler(u'two long strings', u'two long stringz', long_tolerance=False) + with_lt = jf.jaro_winkler(u'two long strings', u'two long stringz', long_tolerance=True) + # make sure long_tolerance does something + assertAlmostEqual(no_lt, 0.975) + assertAlmostEqual(with_lt, 0.984) + + +def test_damerau_levenshtein_distance_type(jf): + jf.damerau_levenshtein_distance(u'abc', u'abc') + with pytest.raises(TypeError) as exc: + jf.damerau_levenshtein_distance(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_levenshtein_distance_type(jf): + assert jf.levenshtein_distance(u'abc', u'abc') == 0 + with pytest.raises(TypeError) as exc: + jf.levenshtein_distance(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_jaro_distance_type(jf): + assert jf.jaro_distance(u'abc', u'abc') == 1 + with pytest.raises(TypeError) as exc: + jf.jaro_distance(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_jaro_winkler_type(jf): + assert jf.jaro_winkler(u'abc', u'abc') == 1 + with pytest.raises(TypeError) as exc: + jf.jaro_winkler(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_mra_comparison_type(jf): + assert jf.match_rating_comparison(u'abc', u'abc') is True + with pytest.raises(TypeError) as exc: + jf.match_rating_comparison(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_hamming_type(jf): + assert jf.hamming_distance(u'abc', u'abc') == 0 + with pytest.raises(TypeError) as exc: + jf.hamming_distance(b'abc', b'abc') + assert 'expected' in str(exc.value) + + +def test_soundex_type(jf): + assert jf.soundex(u'ABC') == 'A120' + with pytest.raises(TypeError) as exc: + jf.soundex(b'ABC') + assert 'expected' in str(exc.value) + + +def test_metaphone_type(jf): + assert jf.metaphone(u'abc') == 'ABK' + with pytest.raises(TypeError) as exc: + jf.metaphone(b'abc') + assert 'expected' in str(exc.value) + + +def test_nysiis_type(jf): + assert jf.nysiis(u'abc') == 'ABC' + with pytest.raises(TypeError) as exc: + jf.nysiis(b'abc') + assert 'expected' in str(exc.value) + + +def test_mr_codex_type(jf): + assert jf.match_rating_codex(u'abc') == 'ABC' + with pytest.raises(TypeError) as exc: + jf.match_rating_codex(b'abc') + assert 'expected' in str(exc.value) + + +def test_porter_type(jf): + assert jf.porter_stem(u'abc') == 'abc' + with pytest.raises(TypeError) as exc: + jf.porter_stem(b'abc') + assert 'expected' in str(exc.value) diff --git a/lib/musicbrainzngs/__init__.py b/lib/musicbrainzngs/__init__.py old mode 100644 new mode 100755 diff --git a/lib/musicbrainzngs/caa.py b/lib/musicbrainzngs/caa.py old mode 100644 new mode 100755 index c43a5bea..235c560a --- a/lib/musicbrainzngs/caa.py +++ b/lib/musicbrainzngs/caa.py @@ -13,6 +13,7 @@ import json from musicbrainzngs import compat from musicbrainzngs import musicbrainz +from musicbrainzngs.util import _unicode hostname = "coverartarchive.org" @@ -78,7 +79,8 @@ def _caa_request(mbid, imageid=None, size=None, entitytype="release"): return resp else: # Otherwise it's json - return json.loads(resp) + data = _unicode(resp) + return json.loads(data) def get_image_list(releaseid): @@ -88,7 +90,7 @@ def get_image_list(releaseid): `_ returned by the Cover Art Archive API. - If an error occurs then a musicbrainz.ResponseError will + If an error occurs then a :class:`~musicbrainzngs.ResponseError` will be raised with one of the following HTTP codes: * 400: `Releaseid` is not a valid UUID @@ -105,7 +107,7 @@ def get_release_group_image_list(releasegroupid): `_ returned by the Cover Art Archive API. - If an error occurs then a musicbrainz.ResponseError will + If an error occurs then a :class:`~musicbrainzngs.ResponseError` will be raised with one of the following HTTP codes: * 400: `Releaseid` is not a valid UUID @@ -147,8 +149,8 @@ def get_image(mbid, coverid, size=None, entitytype="release"): If `size` is not specified, download the largest copy present, which can be very large. - If an error occurs then a musicbrainz.ResponseError will be raised with one - of the following HTTP codes: + If an error occurs then a :class:`~musicbrainzngs.ResponseError` + will be raised with one of the following HTTP codes: * 400: `Releaseid` is not a valid UUID or `coverid` is invalid * 404: No release exists with an MBID of `releaseid` diff --git a/lib/musicbrainzngs/compat.py b/lib/musicbrainzngs/compat.py old mode 100644 new mode 100755 index 36574b5c..47503992 --- a/lib/musicbrainzngs/compat.py +++ b/lib/musicbrainzngs/compat.py @@ -40,8 +40,7 @@ is_py3 = (_ver[0] == 3) if is_py2: from StringIO import StringIO from urllib2 import HTTPPasswordMgr, HTTPDigestAuthHandler, Request,\ - HTTPHandler, build_opener, HTTPError, URLError,\ - build_opener + HTTPHandler, build_opener, HTTPError, URLError from httplib import BadStatusLine, HTTPException from urlparse import urlunparse from urllib import urlencode diff --git a/lib/musicbrainzngs/mbxml.py b/lib/musicbrainzngs/mbxml.py old mode 100644 new mode 100755 index 49a4a02e..60236dc7 --- a/lib/musicbrainzngs/mbxml.py +++ b/lib/musicbrainzngs/mbxml.py @@ -84,8 +84,10 @@ def parse_elements(valid_els, inner_els, element): call parse_subelement() and return a dict {'subelement': } if parse_subelement returns a tuple of the form - ('subelement-key', ) then return a dict - {'subelement-key': } instead + (True, {'subelement-key': }) + then merge the second element of the tuple into the + result (which may have a key other than 'subelement' or + more than 1 key) """ result = {} for sub in element: @@ -96,8 +98,8 @@ def parse_elements(valid_els, inner_els, element): result[t] = sub.text or "" elif t in inner_els.keys(): inner_result = inner_els[t](sub) - if isinstance(inner_result, tuple): - result[inner_result[0]] = inner_result[1] + if isinstance(inner_result, tuple) and inner_result[0]: + result.update(inner_result[1]) else: result[t] = inner_result # add counts for lists when available @@ -135,8 +137,10 @@ def parse_message(message): result = {} valid_elements = {"area": parse_area, "artist": parse_artist, + "instrument": parse_instrument, "label": parse_label, "place": parse_place, + "event": parse_event, "release": parse_release, "release-group": parse_release_group, "series": parse_series, @@ -153,6 +157,8 @@ def parse_message(message): "artist-list": parse_artist_list, "label-list": parse_label_list, "place-list": parse_place_list, + "event-list": parse_event_list, + "instrument-list": parse_instrument_list, "release-list": parse_release_list, "release-group-list": parse_release_group_list, "series-list": parse_series_list, @@ -176,9 +182,14 @@ def parse_collection_list(cl): def parse_collection(collection): result = {} - attribs = ["id"] + attribs = ["id", "type", "entity-type"] elements = ["name", "editor"] - inner_els = {"release-list": parse_release_list} + inner_els = {"release-list": parse_release_list, + "artist-list": parse_artist_list, + "event-list": parse_event_list, + "place-list": parse_place_list, + "recording-list": parse_recording_list, + "work-list": parse_work_list} result.update(parse_attributes(attribs, collection)) result.update(parse_elements(elements, inner_els, collection)) @@ -275,6 +286,38 @@ def parse_place(place): return result +def parse_event_list(el): + return [parse_event(e) for e in el] + +def parse_event(event): + result = {} + attribs = ["id", "type", "ext:score"] + elements = ["name", "time", "setlist", "cancelled", "disambiguation", "user-rating"] + inner_els = {"life-span": parse_lifespan, + "relation-list": parse_relation_list, + "alias-list": parse_alias_list, + "tag-list": parse_tag_list, + "user-tag-list": parse_tag_list, + "rating": parse_rating} + + result.update(parse_attributes(attribs, event)) + result.update(parse_elements(elements, inner_els, event)) + + return result + +def parse_instrument(instrument): + result = {} + attribs = ["id", "type", "ext:score"] + elements = ["name", "description", "disambiguation"] + inner_els = {"relation-list": parse_relation_list, + "tag-list": parse_tag_list, + "alias-list": parse_alias_list, + "annotation": parse_annotation} + result.update(parse_attributes(attribs, instrument)) + result.update(parse_elements(elements, inner_els, instrument)) + + return result + def parse_label_list(ll): return [parse_label(l) for l in ll] @@ -302,15 +345,15 @@ def parse_label(label): def parse_relation_target(tgt): attributes = parse_attributes(['id'], tgt) if 'id' in attributes: - return ('target-id', attributes['id']) + return (True, {'target-id': attributes['id']}) else: - return ('target-id', tgt.text) + return (True, {'target-id': tgt.text}) def parse_relation_list(rl): attribs = ["target-type"] ttype = parse_attributes(attribs, rl) key = "%s-relation-list" % ttype["target-type"] - return (key, [parse_relation(r) for r in rl]) + return (True, {key: [parse_relation(r) for r in rl]}) def parse_relation(relation): result = {} @@ -318,8 +361,10 @@ def parse_relation(relation): elements = ["target", "direction", "begin", "end", "ended", "ordering-key"] inner_els = {"area": parse_area, "artist": parse_artist, + "instrument": parse_instrument, "label": parse_label, "place": parse_place, + "event": parse_event, "recording": parse_recording, "release": parse_release, "release-group": parse_release_group, @@ -330,9 +375,34 @@ def parse_relation(relation): } result.update(parse_attributes(attribs, relation)) result.update(parse_elements(elements, inner_els, relation)) + # We parse attribute-list again to get attributes that have both + # text and attribute values + result.update(parse_elements([], {"attribute-list": parse_relation_attribute_list}, relation)) return result +def parse_relation_attribute_list(attributelist): + ret = [] + for attribute in attributelist: + ret.append(parse_relation_attribute_element(attribute)) + return (True, {"attributes": ret}) + +def parse_relation_attribute_element(element): + # Parses an attribute into a dictionary containing an element + # {"attribute": } and also an additional element + # containing any xml attributes. + # e.g number + # -> {"attribute": "number", "value": "BuxWV 1"} + result = {} + for attr in element.attrib: + if "{" in attr: + a = fixtag(attr, NS_MAP)[0] + else: + a = attr + result[a] = element.attrib[attr] + result["attribute"] = element.text + return result + def parse_release(release): result = {} attribs = ["id", "ext:score"] @@ -359,7 +429,22 @@ def parse_release(release): return result def parse_medium_list(ml): - return [parse_medium(m) for m in ml] + """medium-list results from search have an additional + element containing the number of tracks + over all mediums. Optionally add this""" + medium_list = [] + track_count = None + for m in ml: + tag = fixtag(m.tag, NS_MAP)[0] + if tag == "ws2:medium": + medium_list.append(parse_medium(m)) + elif tag == "ws2:track-count": + track_count = int(m.text) + ret = {"medium-list": medium_list} + if track_count is not None: + ret["medium-track-count"] = track_count + + return (True, ret) def parse_release_event_list(rel): return [parse_release_event(re) for re in rel] @@ -376,7 +461,9 @@ def parse_medium(medium): result = {} elements = ["position", "format", "title"] inner_els = {"disc-list": parse_disc_list, - "track-list": parse_track_list} + "pregap": parse_track, + "track-list": parse_track_list, + "data-track-list": parse_track_list} result.update(parse_elements(elements, inner_els, medium)) return result @@ -477,11 +564,12 @@ def parse_work_attribute_list(wal): return [parse_work_attribute(wa) for wa in wal] def parse_work_attribute(wa): - result = {} attribs = ["type"] - - result.update(parse_attributes(attribs, wa)) - result["attribute"] = wa.text + typeinfo = parse_attributes(attribs, wa) + result = {} + if typeinfo: + result = {"attribute": typeinfo["type"], + "value": wa.text} return result @@ -504,7 +592,9 @@ def parse_disc(disc): result = {} attribs = ["id"] elements = ["sectors"] - inner_els = {"release-list": parse_release_list} + inner_els = {"release-list": parse_release_list, + "offset-list": parse_offset_list + } result.update(parse_attributes(attribs, disc)) result.update(parse_elements(elements, inner_els, disc)) @@ -522,6 +612,15 @@ def parse_cdstub(cdstub): return result +def parse_offset_list(ol): + return [int(o.text) for o in ol] + +def parse_instrument_list(rl): + result = [] + for r in rl: + result.append(parse_instrument(r)) + return result + def parse_release_list(rl): result = [] for r in rl: diff --git a/lib/musicbrainzngs/musicbrainz.py b/lib/musicbrainzngs/musicbrainz.py old mode 100644 new mode 100755 index c765dcf5..e56e67ce --- a/lib/musicbrainzngs/musicbrainz.py +++ b/lib/musicbrainzngs/musicbrainz.py @@ -20,16 +20,17 @@ from musicbrainzngs import mbxml from musicbrainzngs import util from musicbrainzngs import compat +# headphones import base64 -_version = "0.6devMODIFIED" +_version = "0.7devheadphones" _log = logging.getLogger("musicbrainzngs") LUCENE_SPECIAL = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' # Constants for validation. -RELATABLE_TYPES = ['area', 'artist', 'label', 'place', 'recording', 'release', 'release-group', 'series', 'url', 'work'] +RELATABLE_TYPES = ['area', 'artist', 'label', 'place', 'event', 'recording', 'release', 'release-group', 'series', 'url', 'work', 'instrument'] RELATION_INCLUDES = [entity + '-rels' for entity in RELATABLE_TYPES] TAG_INCLUDES = ["tags", "user-tags"] RATING_INCLUDES = ["ratings", "user-ratings"] @@ -44,19 +45,19 @@ VALID_INCLUDES = { 'annotation': [ ], - 'instrument': [ - - ], + 'instrument': ["aliases", "annotation" + ] + RELATION_INCLUDES + TAG_INCLUDES, 'label': [ "releases", # Subqueries "discids", "media", "aliases", "annotation" ] + RELATION_INCLUDES + TAG_INCLUDES + RATING_INCLUDES, 'place' : ["aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, + 'event' : ["aliases"] + RELATION_INCLUDES + TAG_INCLUDES + RATING_INCLUDES, 'recording': [ "artists", "releases", # Subqueries "discids", "media", "artist-credits", "isrcs", - "annotation", "aliases" + "work-level-rels", "annotation", "aliases" ] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, 'release': [ "artists", "labels", "recordings", "release-groups", "media", @@ -85,13 +86,16 @@ VALID_INCLUDES = { 'collection': ['releases'], } VALID_BROWSE_INCLUDES = { - 'releases': ["artist-credits", "labels", "recordings", "isrcs", + 'artist': ["aliases"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, + 'event': ["aliases"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, + 'label': ["aliases"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, + 'recording': ["artist-credits", "isrcs"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, + 'release': ["artist-credits", "labels", "recordings", "isrcs", "release-groups", "media", "discids"] + RELATION_INCLUDES, - 'recordings': ["artist-credits", "isrcs"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, - 'labels': ["aliases"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, - 'artists': ["aliases"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, - 'urls': RELATION_INCLUDES, - 'release-groups': ["artist-credits"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES + 'place': ["aliases"] + TAG_INCLUDES + RELATION_INCLUDES, + 'release-group': ["artist-credits"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, + 'url': RELATION_INCLUDES, + 'work': ["aliases", "annotation"] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, } #: These can be used to filter whenever releases are includes or browsed @@ -264,21 +268,26 @@ def _check_filter_and_make_params(entity, includes, release_status=[], release_t params["type"] = "|".join(release_type) return params -def _docstring(entity, browse=False): +def _docstring_get(entity): + includes = list(VALID_INCLUDES.get(entity, [])) + return _docstring_impl("includes", includes) + +def _docstring_browse(entity): + includes = list(VALID_BROWSE_INCLUDES.get(entity, [])) + return _docstring_impl("includes", includes) + +def _docstring_search(entity): + search_fields = list(VALID_SEARCH_FIELDS.get(entity, [])) + return _docstring_impl("fields", search_fields) + +def _docstring_impl(name, values): def _decorator(func): - if browse: - includes = list(VALID_BROWSE_INCLUDES.get(entity, [])) - else: - includes = list(VALID_INCLUDES.get(entity, [])) # puids are allowed so nothing breaks, but not documented - if "puids" in includes: includes.remove("puids") - includes = ", ".join(includes) + if "puids" in values: values.remove("puids") + vstr = ", ".join(values) + args = {name: vstr} if func.__doc__: - search_fields = list(VALID_SEARCH_FIELDS.get(entity, [])) - # puid is allowed so nothing breaks, but not documented - if "puid" in search_fields: search_fields.remove("puid") - func.__doc__ = func.__doc__.format(includes=includes, - fields=", ".join(search_fields)) + func.__doc__ = func.__doc__.format(**args) return func return _decorator @@ -299,7 +308,8 @@ def auth(u, p): global user, password user = u password = p - + +# headphones def hpauth(u, p): """Set the username and password to be used in subsequent queries to the MusicBrainz XML API that require authentication. @@ -329,8 +339,9 @@ def set_useragent(app, version, contact=None): _log.debug("set user-agent to %s" % _useragent) def set_hostname(new_hostname): - """Set the base hostname for MusicBrainz webservice requests. - Defaults to 'musicbrainz.org'.""" + """Set the hostname for MusicBrainz webservice requests. + Defaults to 'musicbrainz.org'. + You can also include a port: 'localhost:8000'.""" global hostname hostname = new_hostname @@ -471,7 +482,7 @@ class _MusicbrainzHttpRequest(compat.Request): # Core (internal) functions for calling the MB API. -def _safe_read(opener, req, body=None, max_retries=3, retry_delay_delta=2.0): +def _safe_read(opener, req, body=None, max_retries=8, retry_delay_delta=2.0): """Open an HTTP request with a given URL opener and (optionally) a request body. Transient errors lead to retries. Permanent errors and repeated errors are translated into a small set of handleable @@ -673,7 +684,6 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO, # Make request. req = _MusicbrainzHttpRequest(method, url, data) req.add_header('User-Agent', _useragent) - # Add headphones credentials if mb_auth: @@ -806,7 +816,7 @@ def _do_mb_post(path, body): # Single entity by ID -@_docstring('area') +@_docstring_get("area") def get_area_by_id(id, includes=[], release_status=[], release_type=[]): """Get the area with the MusicBrainz `id` as a dict with an 'area' key. @@ -815,7 +825,7 @@ def get_area_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("area", id, includes, params) -@_docstring('artist') +@_docstring_get("artist") def get_artist_by_id(id, includes=[], release_status=[], release_type=[]): """Get the artist with the MusicBrainz `id` as a dict with an 'artist' key. @@ -824,7 +834,7 @@ def get_artist_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("artist", id, includes, params) -@_docstring('instrument') +@_docstring_get("instrument") def get_instrument_by_id(id, includes=[], release_status=[], release_type=[]): """Get the instrument with the MusicBrainz `id` as a dict with an 'artist' key. @@ -833,7 +843,7 @@ def get_instrument_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("instrument", id, includes, params) -@_docstring('label') +@_docstring_get("label") def get_label_by_id(id, includes=[], release_status=[], release_type=[]): """Get the label with the MusicBrainz `id` as a dict with a 'label' key. @@ -842,7 +852,7 @@ def get_label_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("label", id, includes, params) -@_docstring('place') +@_docstring_get("place") def get_place_by_id(id, includes=[], release_status=[], release_type=[]): """Get the place with the MusicBrainz `id` as a dict with an 'place' key. @@ -851,7 +861,19 @@ def get_place_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("place", id, includes, params) -@_docstring('recording') +@_docstring_get("event") +def get_event_by_id(id, includes=[], release_status=[], release_type=[]): + """Get the event with the MusicBrainz `id` as a dict with an 'event' key. + + The event dict has the following keys: + `id`, `type`, `name`, `time`, `disambiguation` and `life-span`. + + *Available includes*: {includes}""" + params = _check_filter_and_make_params("event", includes, + release_status, release_type) + return _do_mb_query("event", id, includes, params) + +@_docstring_get("recording") def get_recording_by_id(id, includes=[], release_status=[], release_type=[]): """Get the recording with the MusicBrainz `id` as a dict with a 'recording' key. @@ -861,7 +883,7 @@ def get_recording_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("recording", id, includes, params) -@_docstring('release') +@_docstring_get("release") def get_release_by_id(id, includes=[], release_status=[], release_type=[]): """Get the release with the MusicBrainz `id` as a dict with a 'release' key. @@ -870,7 +892,7 @@ def get_release_by_id(id, includes=[], release_status=[], release_type=[]): release_status, release_type) return _do_mb_query("release", id, includes, params) -@_docstring('release-group') +@_docstring_get("release-group") def get_release_group_by_id(id, includes=[], release_status=[], release_type=[]): """Get the release group with the MusicBrainz `id` as a dict @@ -881,21 +903,21 @@ def get_release_group_by_id(id, includes=[], release_status, release_type) return _do_mb_query("release-group", id, includes, params) -@_docstring('series') +@_docstring_get("series") def get_series_by_id(id, includes=[]): """Get the series with the MusicBrainz `id` as a dict with a 'series' key. *Available includes*: {includes}""" return _do_mb_query("series", id, includes) -@_docstring('work') +@_docstring_get("work") def get_work_by_id(id, includes=[]): """Get the work with the MusicBrainz `id` as a dict with a 'work' key. *Available includes*: {includes}""" return _do_mb_query("work", id, includes) -@_docstring('url') +@_docstring_get("url") def get_url_by_id(id, includes=[]): """Get the url with the MusicBrainz `id` as a dict with a 'url' key. @@ -905,35 +927,56 @@ def get_url_by_id(id, includes=[]): # Searching -@_docstring('annotation') +@_docstring_search("annotation") def search_annotations(query='', limit=None, offset=None, strict=False, **fields): """Search for annotations and return a dict with an 'annotation-list' key. *Available search fields*: {fields}""" return _do_mb_search('annotation', query, fields, limit, offset, strict) -@_docstring('area') +@_docstring_search("area") def search_areas(query='', limit=None, offset=None, strict=False, **fields): """Search for areas and return a dict with an 'area-list' key. *Available search fields*: {fields}""" return _do_mb_search('area', query, fields, limit, offset, strict) -@_docstring('artist') +@_docstring_search("artist") def search_artists(query='', limit=None, offset=None, strict=False, **fields): """Search for artists and return a dict with an 'artist-list' key. *Available search fields*: {fields}""" return _do_mb_search('artist', query, fields, limit, offset, strict) -@_docstring('label') +@_docstring_search("event") +def search_events(query='', limit=None, offset=None, strict=False, **fields): + """Search for events and return a dict with an 'event-list' key. + + *Available search fields*: {fields}""" + return _do_mb_search('event', query, fields, limit, offset, strict) + +@_docstring_search("instrument") +def search_instruments(query='', limit=None, offset=None, strict=False, **fields): + """Search for instruments and return a dict with a 'instrument-list' key. + + *Available search fields*: {fields}""" + return _do_mb_search('instrument', query, fields, limit, offset, strict) + +@_docstring_search("label") def search_labels(query='', limit=None, offset=None, strict=False, **fields): """Search for labels and return a dict with a 'label-list' key. *Available search fields*: {fields}""" return _do_mb_search('label', query, fields, limit, offset, strict) -@_docstring('recording') +@_docstring_search("place") +def search_places(query='', limit=None, offset=None, strict=False, **fields): + """Search for places and return a dict with a 'place-list' key. + + *Available search fields*: {fields}""" + return _do_mb_search('place', query, fields, limit, offset, strict) + +@_docstring_search("recording") def search_recordings(query='', limit=None, offset=None, strict=False, **fields): """Search for recordings and return a dict with a 'recording-list' key. @@ -941,14 +984,14 @@ def search_recordings(query='', limit=None, offset=None, *Available search fields*: {fields}""" return _do_mb_search('recording', query, fields, limit, offset, strict) -@_docstring('release') +@_docstring_search("release") def search_releases(query='', limit=None, offset=None, strict=False, **fields): """Search for recordings and return a dict with a 'recording-list' key. *Available search fields*: {fields}""" return _do_mb_search('release', query, fields, limit, offset, strict) -@_docstring('release-group') +@_docstring_search("release-group") def search_release_groups(query='', limit=None, offset=None, strict=False, **fields): """Search for release groups and return a dict @@ -957,14 +1000,14 @@ def search_release_groups(query='', limit=None, offset=None, *Available search fields*: {fields}""" return _do_mb_search('release-group', query, fields, limit, offset, strict) -@_docstring('series') +@_docstring_search("series") def search_series(query='', limit=None, offset=None, strict=False, **fields): """Search for series and return a dict with a 'series-list' key. *Available search fields*: {fields}""" return _do_mb_search('series', query, fields, limit, offset, strict) -@_docstring('work') +@_docstring_search("work") def search_works(query='', limit=None, offset=None, strict=False, **fields): """Search for works and return a dict with a 'work-list' key. @@ -973,7 +1016,7 @@ def search_works(query='', limit=None, offset=None, strict=False, **fields): # Lists of entities -@_docstring('discid') +@_docstring_get("discid") def get_releases_by_discid(id, includes=[], toc=None, cdstubs=True, media_format=None): """Search for releases with a :musicbrainz:`Disc ID` or table of contents. @@ -994,8 +1037,8 @@ def get_releases_by_discid(id, includes=[], toc=None, cdstubs=True, media_format The result is a dict with either a 'disc' , a 'cdstub' key or a 'release-list' (fuzzy match with TOC). - A 'disc' has a 'release-list' and a 'cdstub' key has direct 'artist' - and 'title' keys. + A 'disc' has an 'offset-count', an 'offset-list' and a 'release-list'. + A 'cdstub' key has direct 'artist' and 'title' keys. *Available includes*: {includes}""" params = _check_filter_and_make_params("discid", includes, release_status=[], @@ -1008,7 +1051,7 @@ def get_releases_by_discid(id, includes=[], toc=None, cdstubs=True, media_format params["media-format"] = media_format return _do_mb_query("discid", id, includes, params) -@_docstring('recording') +@_docstring_get("recording") def get_recordings_by_echoprint(echoprint, includes=[], release_status=[], release_type=[]): """Search for recordings with an `echoprint `_. @@ -1019,7 +1062,7 @@ def get_recordings_by_echoprint(echoprint, includes=[], release_status=[], raise ResponseError(cause=compat.HTTPError( None, 404, "Not Found", None, None)) -@_docstring('recording') +@_docstring_get("recording") def get_recordings_by_puid(puid, includes=[], release_status=[], release_type=[]): """Search for recordings with a :musicbrainz:`PUID`. @@ -1030,7 +1073,7 @@ def get_recordings_by_puid(puid, includes=[], release_status=[], raise ResponseError(cause=compat.HTTPError( None, 404, "Not Found", None, None)) -@_docstring('recording') +@_docstring_get("recording") def get_recordings_by_isrc(isrc, includes=[], release_status=[], release_type=[]): """Search for recordings with an :musicbrainz:`ISRC`. @@ -1042,7 +1085,7 @@ def get_recordings_by_isrc(isrc, includes=[], release_status=[], release_status, release_type) return _do_mb_query("isrc", isrc, includes, params) -@_docstring('work') +@_docstring_get("work") def get_works_by_iswc(iswc, includes=[]): """Search for works with an :musicbrainz:`ISWC`. The result is a dict with a`work-list`. @@ -1051,7 +1094,9 @@ def get_works_by_iswc(iswc, includes=[]): return _do_mb_query("iswc", iswc, includes) -def _browse_impl(entity, includes, valid_includes, limit, offset, params, release_status=[], release_type=[]): +def _browse_impl(entity, includes, limit, offset, params, release_status=[], release_type=[]): + includes = includes if isinstance(includes, list) else [includes] + valid_includes = VALID_BROWSE_INCLUDES[entity] _check_includes_impl(includes, valid_includes) p = {} for k,v in params.items(): @@ -1068,45 +1113,59 @@ def _browse_impl(entity, includes, valid_includes, limit, offset, params, releas # Browse methods # Browse include are a subset of regular get includes, so we check them here # and the test in _do_mb_query will pass anyway. -@_docstring('artists', browse=True) +@_docstring_browse("artist") def browse_artists(recording=None, release=None, release_group=None, - includes=[], limit=None, offset=None): + work=None, includes=[], limit=None, offset=None): """Get all artists linked to a recording, a release or a release group. You need to give one MusicBrainz ID. *Available includes*: {includes}""" - # optional parameter work? - valid_includes = VALID_BROWSE_INCLUDES['artists'] params = {"recording": recording, "release": release, - "release-group": release_group} - return _browse_impl("artist", includes, valid_includes, - limit, offset, params) + "release-group": release_group, + "work": work} + return _browse_impl("artist", includes, limit, offset, params) -@_docstring('labels', browse=True) +@_docstring_browse("event") +def browse_events(area=None, artist=None, place=None, + includes=[], limit=None, offset=None): + """Get all events linked to a area, a artist or a place. + You need to give one MusicBrainz ID. + + *Available includes*: {includes}""" + params = {"area": area, + "artist": artist, + "place": place} + return _browse_impl("event", includes, limit, offset, params) + +@_docstring_browse("label") def browse_labels(release=None, includes=[], limit=None, offset=None): """Get all labels linked to a relase. You need to give a MusicBrainz ID. *Available includes*: {includes}""" - valid_includes = VALID_BROWSE_INCLUDES['labels'] params = {"release": release} - return _browse_impl("label", includes, valid_includes, - limit, offset, params) + return _browse_impl("label", includes, limit, offset, params) -@_docstring('recordings', browse=True) +@_docstring_browse("place") +def browse_places(area=None, includes=[], limit=None, offset=None): + """Get all places linked to an area. You need to give a MusicBrainz ID. + + *Available includes*: {includes}""" + params = {"area": area} + return _browse_impl("place", includes, limit, offset, params) + +@_docstring_browse("recording") def browse_recordings(artist=None, release=None, includes=[], limit=None, offset=None): """Get all recordings linked to an artist or a release. You need to give one MusicBrainz ID. *Available includes*: {includes}""" - valid_includes = VALID_BROWSE_INCLUDES['recordings'] params = {"artist": artist, "release": release} - return _browse_impl("recording", includes, valid_includes, - limit, offset, params) + return _browse_impl("recording", includes, limit, offset, params) -@_docstring('releases', browse=True) +@_docstring_browse("release") def browse_releases(artist=None, track_artist=None, label=None, recording=None, release_group=None, release_status=[], release_type=[], includes=[], limit=None, offset=None): @@ -1121,16 +1180,15 @@ def browse_releases(artist=None, track_artist=None, label=None, recording=None, *Available includes*: {includes}""" # track_artist param doesn't work yet - valid_includes = VALID_BROWSE_INCLUDES['releases'] params = {"artist": artist, "track_artist": track_artist, "label": label, "recording": recording, "release-group": release_group} - return _browse_impl("release", includes, valid_includes, limit, offset, + return _browse_impl("release", includes, limit, offset, params, release_status, release_type) -@_docstring('release-groups', browse=True) +@_docstring_browse("release-group") def browse_release_groups(artist=None, release=None, release_type=[], includes=[], limit=None, offset=None): """Get all release groups linked to an artist or a release. @@ -1139,25 +1197,27 @@ def browse_release_groups(artist=None, release=None, release_type=[], You can filter by :data:`musicbrainz.VALID_RELEASE_TYPES`. *Available includes*: {includes}""" - valid_includes = VALID_BROWSE_INCLUDES['release-groups'] params = {"artist": artist, "release": release} - return _browse_impl("release-group", includes, valid_includes, - limit, offset, params, [], release_type) + return _browse_impl("release-group", includes, limit, + offset, params, [], release_type) -@_docstring('urls', browse=True) +@_docstring_browse("url") def browse_urls(resource=None, includes=[], limit=None, offset=None): """Get urls by actual URL string. You need to give a URL string as 'resource' *Available includes*: {includes}""" - # optional parameter work? - valid_includes = VALID_BROWSE_INCLUDES['urls'] params = {"resource": resource} - return _browse_impl("url", includes, valid_includes, - limit, offset, params) + return _browse_impl("url", includes, limit, offset, params) -# browse_work is defined in the docs but has no browse criteria +@_docstring_browse("work") +def browse_works(artist=None, includes=[], limit=None, offset=None): + """Get all works linked to an artist + + *Available includes*: {includes}""" + params = {"artist": artist} + return _browse_impl("work", includes, limit, offset, params) # Collections def get_collections(): @@ -1166,16 +1226,59 @@ def get_collections(): # Missing the count in the reply return _do_mb_query("collection", '') +def _do_collection_query(collection, collection_type, limit, offset): + params = {} + if limit: params["limit"] = limit + if offset: params["offset"] = offset + return _do_mb_query("collection", "%s/%s" % (collection, collection_type), [], params) + +def get_artists_in_collection(collection, limit=None, offset=None): + """List the artists in a collection. + Returns a dict with a 'collection' key, which again has a 'artist-list'. + + See `Browsing`_ for how to use `limit` and `offset`. + """ + return _do_collection_query(collection, "artists", limit, offset) + def get_releases_in_collection(collection, limit=None, offset=None): """List the releases in a collection. Returns a dict with a 'collection' key, which again has a 'release-list'. See `Browsing`_ for how to use `limit` and `offset`. """ - params = {} - if limit: params["limit"] = limit - if offset: params["offset"] = offset - return _do_mb_query("collection", "%s/releases" % collection, [], params) + return _do_collection_query(collection, "releases", limit, offset) + +def get_events_in_collection(collection, limit=None, offset=None): + """List the events in a collection. + Returns a dict with a 'collection' key, which again has a 'event-list'. + + See `Browsing`_ for how to use `limit` and `offset`. + """ + return _do_collection_query(collection, "events", limit, offset) + +def get_places_in_collection(collection, limit=None, offset=None): + """List the places in a collection. + Returns a dict with a 'collection' key, which again has a 'place-list'. + + See `Browsing`_ for how to use `limit` and `offset`. + """ + return _do_collection_query(collection, "places", limit, offset) + +def get_recordings_in_collection(collection, limit=None, offset=None): + """List the recordings in a collection. + Returns a dict with a 'collection' key, which again has a 'recording-list'. + + See `Browsing`_ for how to use `limit` and `offset`. + """ + return _do_collection_query(collection, "recordings", limit, offset) + +def get_works_in_collection(collection, limit=None, offset=None): + """List the works in a collection. + Returns a dict with a 'collection' key, which again has a 'work-list'. + + See `Browsing`_ for how to use `limit` and `offset`. + """ + return _do_collection_query(collection, "works", limit, offset) # Submission methods @@ -1219,11 +1322,17 @@ def submit_tags(**kwargs): Takes parameters named e.g. 'artist_tags', 'recording_tags', etc., and of the form: {entity_id1: [tag1, ...], ...} + If you only have one tag for an entity you can use a string instead + of a list. The user's tags for each entity will be set to that list, adding or removing tags as necessary. Submitting an empty list for an entity will remove all tags for that entity by the user. """ + for k, v in kwargs.items(): + for id, tags in v.items(): + kwargs[k][id] = tags if isinstance(tags, list) else [tags] + query = mbxml.make_tag_request(**kwargs) return _do_mb_post("tag", query) diff --git a/lib/musicbrainzngs/util.py b/lib/musicbrainzngs/util.py old mode 100644 new mode 100755 diff --git a/lib/mutagen/README.rst b/lib/mutagen/README.rst deleted file mode 100644 index 7ea1b21e..00000000 --- a/lib/mutagen/README.rst +++ /dev/null @@ -1,58 +0,0 @@ -Mutagen -======= - -Mutagen is a Python module to handle audio metadata. It supports ASF, FLAC, -M4A, Monkey's Audio, MP3, Musepack, Ogg Opus, Ogg FLAC, Ogg Speex, Ogg -Theora, Ogg Vorbis, True Audio, WavPack, OptimFROG, and AIFF audio files. -All versions of ID3v2 are supported, and all standard ID3v2.4 frames are -parsed. It can read Xing headers to accurately calculate the bitrate and -length of MP3s. ID3 and APEv2 tags can be edited regardless of audio -format. It can also manipulate Ogg streams on an individual packet/page -level. - -Mutagen works on Python 2.6, 2.7, 3.3, 3.4 (CPython and PyPy) and has no -dependencies outside the Python standard library. - - -Installing ----------- - - $ ./setup.py build - $ su -c "./setup.py install" - - -Documentation -------------- - -The primary documentation for Mutagen is the doc strings found in -the source code and the sphinx documentation in the docs/ directory. - -To build the docs (needs sphinx): - - $ ./setup.py build_sphinx - -The tools/ directory contains several useful examples. - -The docs are also hosted on readthedocs.org: - - http://mutagen.readthedocs.org - - -Testing the Module ------------------- - -To test Mutagen's MP3 reading support, run - $ tools/mutagen-pony -Mutagen will try to load all of them, and report any errors. - -To look at the tags in files, run - $ tools/mutagen-inspect filename ... - -To run our test suite, - $ ./setup.py test - - -Compatibility/Bugs ------------------- - -See docs/bugs.rst diff --git a/lib/mutagen/__init__.py b/lib/mutagen/__init__.py old mode 100644 new mode 100755 index 83b47e5f..4603fe6e --- a/lib/mutagen/__init__.py +++ b/lib/mutagen/__init__.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. - +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Mutagen aims to be an all purpose multimedia tagging library. @@ -14,7 +13,7 @@ import mutagen.[format] metadata = mutagen.[format].Open(filename) -`metadata` acts like a dictionary of tags in the file. Tags are generally a +``metadata`` acts like a dictionary of tags in the file. Tags are generally a list of string-like values, but may have additional methods available depending on tag or format. They may also be entirely different objects for certain keys, again depending on format. @@ -22,9 +21,9 @@ for certain keys, again depending on format. from mutagen._util import MutagenError from mutagen._file import FileType, StreamInfo, File -from mutagen._tags import Metadata +from mutagen._tags import Tags, Metadata, PaddingInfo -version = (1, 27) +version = (1, 38, -1) """Version tuple.""" version_string = ".".join(map(str, version)) @@ -38,4 +37,8 @@ StreamInfo File +Tags + Metadata + +PaddingInfo diff --git a/lib/mutagen/_compat.py b/lib/mutagen/_compat.py old mode 100644 new mode 100755 index fa01f72e..ebb80ea4 --- a/lib/mutagen/_compat.py +++ b/lib/mutagen/_compat.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import sys @@ -16,6 +16,7 @@ if PY2: from StringIO import StringIO BytesIO = StringIO from cStringIO import StringIO as cBytesIO + from itertools import izip long_ = long integer_types = (int, long) @@ -57,6 +58,7 @@ elif PY3: string_types = (str,) text_type = str + izip = zip xrange = range cmp = lambda a, b: (a > b) - (a < b) chr_ = lambda x: bytes([x]) diff --git a/lib/mutagen/_constants.py b/lib/mutagen/_constants.py old mode 100644 new mode 100755 index 62c1ce02..5c1c1a10 --- a/lib/mutagen/_constants.py +++ b/lib/mutagen/_constants.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Constants used by Mutagen.""" diff --git a/lib/mutagen/_file.py b/lib/mutagen/_file.py old mode 100644 new mode 100755 index fc1caeb9..1ccbf240 --- a/lib/mutagen/_file.py +++ b/lib/mutagen/_file.py @@ -1,21 +1,26 @@ +# -*- coding: utf-8 -*- # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import warnings -from mutagen._util import DictMixin +from mutagen._util import DictMixin, loadfile +from mutagen._compat import izip class FileType(DictMixin): - """An abstract object wrapping tags and audio stream information. + """FileType(filething, **kwargs) - Attributes: + Args: + filething (filething): A filename or a file-like object - * info -- stream information (length, bitrate, sample rate) - * tags -- metadata tags, if any + Subclasses might take further options via keyword arguments. + + An abstract object wrapping tags and audio stream information. Each file format has different potential tags and stream information. @@ -23,6 +28,10 @@ class FileType(DictMixin): FileTypes implement an interface very similar to Metadata; the dict interface, save, load, and delete calls on a FileType call the appropriate methods on its tag data. + + Attributes: + info (`StreamInfo`): contains length, bitrate, sample rate + tags (`Tags`): metadata tags, if any, otherwise `None` """ __module__ = "mutagen" @@ -32,14 +41,15 @@ class FileType(DictMixin): filename = None _mimes = ["application/octet-stream"] - def __init__(self, filename=None, *args, **kwargs): - if filename is None: + def __init__(self, *args, **kwargs): + if not args and not kwargs: warnings.warn("FileType constructor requires a filename", DeprecationWarning) else: - self.load(filename, *args, **kwargs) + self.load(*args, **kwargs) - def load(self, filename, *args, **kwargs): + @loadfile() + def load(self, filething, *args, **kwargs): raise NotImplementedError def __getitem__(self, key): @@ -86,34 +96,47 @@ class FileType(DictMixin): else: return self.tags.keys() - def delete(self, filename=None): - """Remove tags from a file.""" + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. + + In cases where the tagging format is independent of the file type + (for example `mutagen.id3.ID3`) all traces of the tagging format will + be removed. + In cases where the tag is part of the file type, all tags and + padding will be removed. + + The tags attribute will be cleared as well if there is one. + + Does nothing if the file has no tags. + + Raises: + MutagenError: if deleting wasn't possible + """ if self.tags is not None: - if filename is None: - filename = self.filename - else: - warnings.warn( - "delete(filename=...) is deprecated, reload the file", - DeprecationWarning) - return self.tags.delete(filename) + return self.tags.delete(filething) - def save(self, filename=None, **kwargs): - """Save metadata tags.""" + @loadfile(writable=True) + def save(self, filething, **kwargs): + """save(filething=None, **kwargs) + + Save metadata tags. + + Raises: + MutagenError: if saving wasn't possible + """ - if filename is None: - filename = self.filename - else: - warnings.warn( - "save(filename=...) is deprecated, reload the file", - DeprecationWarning) if self.tags is not None: - return self.tags.save(filename, **kwargs) - else: - raise ValueError("no tags in file") + return self.tags.save(filething, **kwargs) def pprint(self): - """Print stream information and comment key=value pairs.""" + """ + Returns: + text: stream information and comment key=value pairs. + """ stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) try: @@ -126,14 +149,15 @@ class FileType(DictMixin): def add_tags(self): """Adds new tags to the file. - Raises if tags already exist. + Raises: + MutagenError: if tags already exist or adding is not possible. """ raise NotImplementedError @property def mime(self): - """A list of mime types""" + """A list of mime types (`text`)""" mimes = [] for Kind in type(self).__mro__: @@ -144,6 +168,20 @@ class FileType(DictMixin): @staticmethod def score(filename, fileobj, header): + """Returns a score for how likely the file can be parsed by this type. + + Args: + filename (path): a file path + fileobj (fileobj): a file object open in rb mode. Position is + undefined + header (bytes): data of undefined length, starts with the start of + the file. + + Returns: + int: negative if definitely not a matching type, otherwise a score, + the bigger the more certain that the file can be loaded. + """ + raise NotImplementedError @@ -158,13 +196,19 @@ class StreamInfo(object): __module__ = "mutagen" def pprint(self): - """Print stream information""" + """ + Returns: + text: Print stream information + """ raise NotImplementedError -def File(filename, options=None, easy=False): - """Guess the type of the file and try to open it. +@loadfile(method=False) +def File(filething, options=None, easy=False): + """File(filething, options=None, easy=False) + + Guess the type of the file and try to open it. The file type is decided by several things, such as the first 128 bytes (which usually contains a file type identifier), the @@ -172,12 +216,20 @@ def File(filename, options=None, easy=False): If no appropriate type could be found, None is returned. - :param options: Sequence of :class:`FileType` implementations, defaults to - all included ones. + Args: + filething (filething) + options: Sequence of :class:`FileType` implementations, + defaults to all included ones. + easy (bool): If the easy wrappers should be returnd if available. + For example :class:`EasyMP3 ` instead of + :class:`MP3 `. - :param easy: If the easy wrappers should be returnd if available. - For example :class:`EasyMP3 ` instead - of :class:`MP3 `. + Returns: + FileType: A FileType instance for the detected type or `None` in case + the type couln't be determined. + + Raises: + MutagenError: in case the detected type fails to load the file. """ if options is None: @@ -211,27 +263,37 @@ def File(filename, options=None, easy=False): from mutagen.optimfrog import OptimFROG from mutagen.aiff import AIFF from mutagen.aac import AAC + from mutagen.smf import SMF + from mutagen.dsf import DSF options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, FLAC, AIFF, APEv2File, MP4, ID3FileType, WavPack, - Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC] + Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC, + SMF, DSF] if not options: return None - fileobj = open(filename, "rb") + fileobj = filething.fileobj + try: header = fileobj.read(128) - # Sort by name after score. Otherwise import order affects - # Kind sort order, which affects treatment of things with - # equals scores. - results = [(Kind.score(filename, fileobj, header), Kind.__name__) - for Kind in options] - finally: - fileobj.close() - results = list(zip(results, options)) + except IOError: + header = b"" + + # Sort by name after score. Otherwise import order affects + # Kind sort order, which affects treatment of things with + # equals scores. + results = [(Kind.score(filething.name, fileobj, header), Kind.__name__) + for Kind in options] + + results = list(izip(results, options)) results.sort() (score, name), Kind = results[-1] if score > 0: - return Kind(filename) + try: + fileobj.seek(0, 0) + except IOError: + pass + return Kind(fileobj, filename=filething.filename) else: return None diff --git a/lib/mutagen/_senf/README.rst b/lib/mutagen/_senf/README.rst new file mode 100755 index 00000000..49ff9281 --- /dev/null +++ b/lib/mutagen/_senf/README.rst @@ -0,0 +1 @@ +Don't change things here, this is a copy of https://github.com/lazka/senf diff --git a/lib/mutagen/_senf/__init__.py b/lib/mutagen/_senf/__init__.py new file mode 100755 index 00000000..38b0aff6 --- /dev/null +++ b/lib/mutagen/_senf/__init__.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import os + +if os.name != "nt": + # make imports work + _winapi = object() + +from ._fsnative import fsnative, path2fsn, fsn2text, fsn2bytes, \ + bytes2fsn, uri2fsn, fsn2uri, text2fsn +from ._print import print_, input_ +from ._stdlib import sep, pathsep, curdir, pardir, altsep, extsep, devnull, \ + defpath, getcwd, expanduser, expandvars +from ._argv import argv +from ._environ import environ, getenv, unsetenv, putenv +from ._temp import mkstemp, gettempdir, gettempprefix, mkdtemp + + +fsnative, print_, getcwd, getenv, unsetenv, putenv, environ, expandvars, \ + path2fsn, fsn2text, fsn2bytes, bytes2fsn, uri2fsn, fsn2uri, mkstemp, \ + gettempdir, gettempprefix, mkdtemp, input_, expanduser, text2fsn + + +version = (1, 2, 2) +"""Tuple[`int`, `int`, `int`]: The version tuple (major, minor, micro)""" + + +version_string = ".".join(map(str, version)) +"""`str`: A version string""" + + +argv = argv +"""List[`fsnative`]: Like `sys.argv` but contains unicode under +Windows + Python 2 +""" + + +sep = sep +"""`fsnative`: Like `os.sep` but a `fsnative`""" + + +pathsep = pathsep +"""`fsnative`: Like `os.pathsep` but a `fsnative`""" + + +curdir = curdir +"""`fsnative`: Like `os.curdir` but a `fsnative`""" + + +pardir = pardir +"""`fsnative`: Like `os.pardir` but a fsnative""" + + +altsep = altsep +"""`fsnative` or `None`: Like `os.altsep` but a `fsnative` or `None`""" + + +extsep = extsep +"""`fsnative`: Like `os.extsep` but a `fsnative`""" + + +devnull = devnull +"""`fsnative`: Like `os.devnull` but a `fsnative`""" + + +defpath = defpath +"""`fsnative`: Like `os.defpath` but a `fsnative`""" + + +__all__ = [] diff --git a/lib/mutagen/_senf/_argv.py b/lib/mutagen/_senf/_argv.py new file mode 100755 index 00000000..56b1d416 --- /dev/null +++ b/lib/mutagen/_senf/_argv.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import sys +import ctypes +import collections +from functools import total_ordering + +from ._compat import PY2, string_types +from ._fsnative import is_win, _fsn2legacy, path2fsn +from . import _winapi as winapi + + +def _get_win_argv(): + """Returns a unicode argv under Windows and standard sys.argv otherwise + + Returns: + List[`fsnative`] + """ + + assert is_win + + argc = ctypes.c_int() + try: + argv = winapi.CommandLineToArgvW( + winapi.GetCommandLineW(), ctypes.byref(argc)) + except WindowsError: + return [] + + if not argv: + return [] + + res = argv[max(0, argc.value - len(sys.argv)):argc.value] + + winapi.LocalFree(argv) + + return res + + +@total_ordering +class Argv(collections.MutableSequence): + """List[`fsnative`]: Like `sys.argv` but contains unicode + keys and values under Windows + Python 2. + + Any changes made will be forwarded to `sys.argv`. + """ + + def __init__(self): + if PY2 and is_win: + self._argv = _get_win_argv() + else: + self._argv = sys.argv + + def __getitem__(self, index): + return self._argv[index] + + def __setitem__(self, index, value): + if isinstance(value, string_types): + value = path2fsn(value) + + self._argv[index] = value + + if sys.argv is not self._argv: + try: + if isinstance(value, string_types): + sys.argv[index] = _fsn2legacy(value) + else: + sys.argv[index] = [_fsn2legacy(path2fsn(v)) for v in value] + except IndexError: + pass + + def __delitem__(self, index): + del self._argv[index] + try: + del sys.argv[index] + except IndexError: + pass + + def __eq__(self, other): + return self._argv == other + + def __lt__(self, other): + return self._argv < other + + def __len__(self): + return len(self._argv) + + def __repr__(self): + return repr(self._argv) + + def insert(self, index, value): + value = path2fsn(value) + self._argv.insert(index, value) + if sys.argv is not self._argv: + sys.argv.insert(index, _fsn2legacy(value)) + + +argv = Argv() diff --git a/lib/mutagen/_senf/_compat.py b/lib/mutagen/_senf/_compat.py new file mode 100755 index 00000000..a31c4b87 --- /dev/null +++ b/lib/mutagen/_senf/_compat.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import sys + + +PY2 = sys.version_info[0] == 2 +PY3 = not PY2 + + +if PY2: + from urlparse import urlparse, urlunparse + urlparse, urlunparse + from urllib import pathname2url, url2pathname, quote, unquote + pathname2url, url2pathname, quote, unquote + + from StringIO import StringIO + BytesIO = StringIO + from io import StringIO as TextIO + TextIO + + string_types = (str, unicode) + text_type = unicode + + iteritems = lambda d: d.iteritems() +elif PY3: + from urllib.parse import urlparse, quote, unquote, urlunparse + urlparse, quote, unquote, urlunparse + from urllib.request import pathname2url, url2pathname + pathname2url, url2pathname + + from io import StringIO + StringIO = StringIO + TextIO = StringIO + from io import BytesIO + BytesIO = BytesIO + + string_types = (str,) + text_type = str + + iteritems = lambda d: iter(d.items()) diff --git a/lib/mutagen/_senf/_environ.py b/lib/mutagen/_senf/_environ.py new file mode 100755 index 00000000..4f8a0640 --- /dev/null +++ b/lib/mutagen/_senf/_environ.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import os +import ctypes +import collections + +from ._compat import text_type, PY2 +from ._fsnative import path2fsn, is_win, _fsn2legacy, fsnative +from . import _winapi as winapi + + +def get_windows_env_var(key): + """Get an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + buf = ctypes.create_unicode_buffer(32767) + + stored = winapi.GetEnvironmentVariableW(key, buf, 32767) + if stored == 0: + raise ctypes.WinError() + return buf[:stored] + + +def set_windows_env_var(key, value): + """Set an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + if not isinstance(value, text_type): + raise TypeError("%r not of type %r" % (value, text_type)) + + status = winapi.SetEnvironmentVariableW(key, value) + if status == 0: + raise ctypes.WinError() + + +def del_windows_env_var(key): + """Delete an env var. + + Raises: + WindowsError + """ + + if not isinstance(key, text_type): + raise TypeError("%r not of type %r" % (key, text_type)) + + status = winapi.SetEnvironmentVariableW(key, None) + if status == 0: + raise ctypes.WinError() + + +def read_windows_environ(): + """Returns a unicode dict of the Windows environment. + + Raises: + WindowsEnvironError + """ + + res = winapi.GetEnvironmentStringsW() + if not res: + raise ctypes.WinError() + + res = ctypes.cast(res, ctypes.POINTER(ctypes.c_wchar)) + + done = [] + current = u"" + i = 0 + while 1: + c = res[i] + i += 1 + if c == u"\x00": + if not current: + break + done.append(current) + current = u"" + continue + current += c + + dict_ = {} + for entry in done: + try: + key, value = entry.split(u"=", 1) + except ValueError: + continue + key = _norm_key(key) + dict_[key] = value + + status = winapi.FreeEnvironmentStringsW(res) + if status == 0: + raise ctypes.WinError() + + return dict_ + + +def _norm_key(key): + assert isinstance(key, fsnative) + if is_win: + key = key.upper() + return key + + +class Environ(collections.MutableMapping): + """Dict[`fsnative`, `fsnative`]: Like `os.environ` but contains unicode + keys and values under Windows + Python 2. + + Any changes made will be forwarded to `os.environ`. + """ + + def __init__(self): + if is_win and PY2: + try: + env = read_windows_environ() + except WindowsError: + env = {} + else: + env = os.environ + self._env = env + + def __getitem__(self, key): + key = _norm_key(path2fsn(key)) + return self._env[key] + + def __setitem__(self, key, value): + key = _norm_key(path2fsn(key)) + value = path2fsn(value) + + if is_win and PY2: + # this calls putenv, so do it first and replace later + try: + os.environ[_fsn2legacy(key)] = _fsn2legacy(value) + except OSError: + raise ValueError + + try: + set_windows_env_var(key, value) + except WindowsError: + # py3+win fails for invalid keys. try to do the same + raise ValueError + try: + self._env[key] = value + except OSError: + raise ValueError + + def __delitem__(self, key): + key = _norm_key(path2fsn(key)) + + if is_win and PY2: + try: + del_windows_env_var(key) + except WindowsError: + pass + + try: + del os.environ[_fsn2legacy(key)] + except KeyError: + pass + + del self._env[key] + + def __iter__(self): + return iter(self._env) + + def __len__(self): + return len(self._env) + + def __repr__(self): + return repr(self._env) + + def copy(self): + return self._env.copy() + + +environ = Environ() + + +def getenv(key, value=None): + """Like `os.getenv` but returns unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to get + value (object): The value to return if the env var does not exist + Returns: + `fsnative` or `object`: + The env var or the passed value if it doesn't exist + """ + + key = path2fsn(key) + if is_win and PY2: + return environ.get(key, value) + return os.getenv(key, value) + + +def unsetenv(key): + """Like `os.unsetenv` but takes unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to unset + """ + + key = path2fsn(key) + if is_win: + # python 3 has no unsetenv under Windows -> use our ctypes one as well + try: + del_windows_env_var(key) + except WindowsError: + pass + else: + os.unsetenv(key) + + +def putenv(key, value): + """Like `os.putenv` but takes unicode under Windows + Python 2 + + Args: + key (pathlike): The env var to get + value (pathlike): The value to set + Raises: + ValueError + """ + + key = path2fsn(key) + value = path2fsn(value) + + if is_win and PY2: + try: + set_windows_env_var(key, value) + except WindowsError: + # py3 + win fails here + raise ValueError + else: + try: + os.putenv(key, value) + except OSError: + # win + py3 raise here for invalid keys which is probably a bug. + # ValueError seems better + raise ValueError diff --git a/lib/mutagen/_senf/_fsnative.py b/lib/mutagen/_senf/_fsnative.py new file mode 100755 index 00000000..c210995e --- /dev/null +++ b/lib/mutagen/_senf/_fsnative.py @@ -0,0 +1,610 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import os +import sys +import ctypes +import codecs + +from . import _winapi as winapi +from ._compat import text_type, PY3, PY2, url2pathname, urlparse, quote, \ + unquote, urlunparse + + +is_win = os.name == "nt" +is_unix = not is_win +is_darwin = sys.platform == "darwin" + +_surrogatepass = "strict" if PY2 else "surrogatepass" + + +def _normalize_codec(codec, _cache={}): + """Raises LookupError""" + + try: + return _cache[codec] + except KeyError: + _cache[codec] = codecs.lookup(codec).name + return _cache[codec] + + +def _swap_bytes(data): + """swaps bytes for 16 bit, leaves remaining trailing bytes alone""" + + a, b = data[1::2], data[::2] + data = bytearray().join(bytearray(x) for x in zip(a, b)) + if len(b) > len(a): + data += b[-1:] + return bytes(data) + + +def _codec_fails_on_encode_surrogates(codec, _cache={}): + """Returns if a codec fails correctly when passing in surrogates with + a surrogatepass/surrogateescape error handler. Some codecs were broken + in Python <3.4 + """ + + try: + return _cache[codec] + except KeyError: + try: + u"\uD800\uDC01".encode(codec) + except UnicodeEncodeError: + _cache[codec] = True + else: + _cache[codec] = False + return _cache[codec] + + +def _codec_can_decode_with_surrogatepass(codec, _cache={}): + """Returns if a codec supports the surrogatepass error handler when + decoding. Some codecs were broken in Python <3.4 + """ + + try: + return _cache[codec] + except KeyError: + try: + u"\ud83d".encode( + codec, _surrogatepass).decode(codec, _surrogatepass) + except UnicodeDecodeError: + _cache[codec] = False + else: + _cache[codec] = True + return _cache[codec] + + +def _bytes2winpath(data, codec): + """Like data.decode(codec, 'surrogatepass') but makes utf-16-le/be work + on Python < 3.4 + Windows + + https://bugs.python.org/issue27971 + + Raises UnicodeDecodeError, LookupError + """ + + try: + return data.decode(codec, _surrogatepass) + except UnicodeDecodeError: + if not _codec_can_decode_with_surrogatepass(codec): + if _normalize_codec(codec) == "utf-16-be": + data = _swap_bytes(data) + codec = "utf-16-le" + if _normalize_codec(codec) == "utf-16-le": + buffer_ = ctypes.create_string_buffer(data + b"\x00\x00") + value = ctypes.wstring_at(buffer_, len(data) // 2) + if value.encode("utf-16-le", _surrogatepass) != data: + raise + return value + else: + raise + else: + raise + + +def _winpath2bytes_py3(text, codec): + """Fallback implementation for text including surrogates""" + + # merge surrogate codepoints + if _normalize_codec(codec).startswith("utf-16"): + # fast path, utf-16 merges anyway + return text.encode(codec, _surrogatepass) + return _bytes2winpath( + text.encode("utf-16-le", _surrogatepass), + "utf-16-le").encode(codec, _surrogatepass) + + +if PY2: + def _winpath2bytes(text, codec): + return text.encode(codec) +else: + def _winpath2bytes(text, codec): + if _codec_fails_on_encode_surrogates(codec): + try: + return text.encode(codec) + except UnicodeEncodeError: + return _winpath2bytes_py3(text, codec) + else: + return _winpath2bytes_py3(text, codec) + + +def _fsn2legacy(path): + """Takes a fsnative path and returns a path that can be put into os.environ + or sys.argv. Might result in a mangled path on Python2 + Windows. + Can't fail. + + Args: + path (fsnative) + Returns: + str + """ + + if PY2 and is_win: + return path.encode(_encoding, "replace") + return path + + +def _fsnative(text): + if not isinstance(text, text_type): + raise TypeError("%r needs to be a text type (%r)" % (text, text_type)) + + if is_unix: + # First we go to bytes so we can be sure we have a valid source. + # Theoretically we should fail here in case we have a non-unicode + # encoding. But this would make everything complicated and there is + # no good way to handle a failure from the user side. Instead + # fall back to utf-8 which is the most likely the right choice in + # a mis-configured environment + encoding = _encoding + try: + path = text.encode(encoding, _surrogatepass) + except UnicodeEncodeError: + path = text.encode("utf-8", _surrogatepass) + + if b"\x00" in path: + path = path.replace(b"\x00", fsn2bytes(_fsnative(u"\uFFFD"), None)) + + if PY3: + return path.decode(_encoding, "surrogateescape") + return path + else: + if u"\x00" in text: + text = text.replace(u"\x00", u"\uFFFD") + return text + + +def _create_fsnative(type_): + # a bit of magic to make fsnative(u"foo") and isinstance(path, fsnative) + # work + + class meta(type): + + def __instancecheck__(self, instance): + return _typecheck_fsnative(instance) + + def __subclasscheck__(self, subclass): + return issubclass(subclass, type_) + + class impl(object): + """fsnative(text=u"") + + Args: + text (text): The text to convert to a path + Returns: + fsnative: The new path. + Raises: + TypeError: In case something other then `text` has been passed + + This type is a virtual base class for the real path type. + Instantiating it returns an instance of the real path type and it + overrides instance and subclass checks so that `isinstance` and + `issubclass` checks work: + + :: + + isinstance(fsnative(u"foo"), fsnative) == True + issubclass(type(fsnative(u"foo")), fsnative) == True + + The real returned type is: + + - **Python 2 + Windows:** :obj:`python:unicode`, with ``surrogates``, + without ``null`` + - **Python 2 + Unix:** :obj:`python:str`, without ``null`` + - **Python 3 + Windows:** :obj:`python3:str`, with ``surrogates``, + without ``null`` + - **Python 3 + Unix:** :obj:`python3:str`, with ``surrogates``, without + ``null``, without code points not encodable with the locale encoding + + Constructing a `fsnative` can't fail. + + Passing a `fsnative` to :func:`open` will never lead to `ValueError` + or `TypeError`. + + Any operation on `fsnative` can also use the `str` type, as long as + the `str` only contains ASCII and no NULL. + """ + + def __new__(cls, text=u""): + return _fsnative(text) + + new_type = meta("fsnative", (object,), dict(impl.__dict__)) + new_type.__module__ = "senf" + return new_type + + +fsnative_type = text_type if is_win or PY3 else bytes +fsnative = _create_fsnative(fsnative_type) + + +def _typecheck_fsnative(path): + """ + Args: + path (object) + Returns: + bool: if path is a fsnative + """ + + if not isinstance(path, fsnative_type): + return False + + if PY3 or is_win: + if u"\x00" in path: + return False + + if is_unix and not _is_unicode_encoding: + try: + path.encode(_encoding, "surrogateescape") + except UnicodeEncodeError: + return False + elif b"\x00" in path: + return False + + return True + + +def _fsn2native(path): + """ + Args: + path (fsnative) + Returns: + `text` on Windows, `bytes` on Unix + Raises: + TypeError: in case the type is wrong or the ´str` on Py3 + Unix + can't be converted to `bytes` + + This helper allows to validate the type and content of a path. + To reduce overhead the encoded value for Py3 + Unix is returned so + it can be reused. + """ + + if not isinstance(path, fsnative_type): + raise TypeError("path needs to be %s, not %s" % ( + fsnative_type.__name__, type(path).__name__)) + + if is_unix: + if PY3: + try: + path = path.encode(_encoding, "surrogateescape") + except UnicodeEncodeError: + assert not _is_unicode_encoding + # This look more like ValueError, but raising only one error + # makes things simpler... also one could say str + surrogates + # is its own type + raise TypeError( + "path contained Unicode code points not valid in" + "the current path encoding. To create a valid " + "path from Unicode use text2fsn()") + + if b"\x00" in path: + raise TypeError("fsnative can't contain nulls") + else: + if u"\x00" in path: + raise TypeError("fsnative can't contain nulls") + + return path + + +def _get_encoding(): + """The encoding used for paths, argv, environ, stdout and stdin""" + + encoding = sys.getfilesystemencoding() + if encoding is None: + if is_darwin: + encoding = "utf-8" + elif is_win: + encoding = "mbcs" + else: + encoding = "ascii" + encoding = _normalize_codec(encoding) + return encoding + + +_encoding = _get_encoding() +_is_unicode_encoding = _encoding.startswith("utf") + + +def path2fsn(path): + """ + Args: + path (pathlike): The path to convert + Returns: + `fsnative` + Raises: + TypeError: In case the type can't be converted to a `fsnative` + ValueError: In case conversion fails + + Returns a `fsnative` path for a `pathlike`. + """ + + # allow mbcs str on py2+win and bytes on py3 + if PY2: + if is_win: + if isinstance(path, bytes): + path = path.decode(_encoding) + else: + if isinstance(path, text_type): + path = path.encode(_encoding) + if "\x00" in path: + raise ValueError("embedded null") + else: + path = getattr(os, "fspath", lambda x: x)(path) + if isinstance(path, bytes): + if b"\x00" in path: + raise ValueError("embedded null") + path = path.decode(_encoding, "surrogateescape") + elif is_unix and isinstance(path, str): + # make sure we can encode it and this is not just some random + # unicode string + data = path.encode(_encoding, "surrogateescape") + if b"\x00" in data: + raise ValueError("embedded null") + else: + if u"\x00" in path: + raise ValueError("embedded null") + + if not isinstance(path, fsnative_type): + raise TypeError("path needs to be %s", fsnative_type.__name__) + + return path + + +def fsn2text(path, strict=False): + """ + Args: + path (fsnative): The path to convert + strict (bool): Fail in case the conversion is not reversible + Returns: + `text` + Raises: + TypeError: In case no `fsnative` has been passed + ValueError: In case ``strict`` was True and the conversion failed + + Converts a `fsnative` path to `text`. + + Can be used to pass a path to some unicode API, like for example a GUI + toolkit. + + If ``strict`` is True the conversion will fail in case it is not + reversible. This can be useful for converting program arguments that are + supposed to be text and erroring out in case they are not. + + Encoding with a Unicode encoding will always succeed with the result. + """ + + path = _fsn2native(path) + + errors = "strict" if strict else "replace" + + if is_win: + return path.encode("utf-16-le", _surrogatepass).decode("utf-16-le", + errors) + else: + return path.decode(_encoding, errors) + + +def text2fsn(text): + """ + Args: + text (text): The text to convert + Returns: + `fsnative` + Raises: + TypeError: In case no `text` has been passed + + Takes `text` and converts it to a `fsnative`. + + This operation is not reversible and can't fail. + """ + + return fsnative(text) + + +def fsn2bytes(path, encoding): + """ + Args: + path (fsnative): The path to convert + encoding (`str` or `None`): `None` if you don't care about Windows + Returns: + `bytes` + Raises: + TypeError: If no `fsnative` path is passed + ValueError: If encoding fails or no encoding is given + + Converts a `fsnative` path to `bytes`. + + The passed *encoding* is only used on platforms where paths are not + associated with an encoding (Windows for example). If you don't care about + Windows you can pass `None`. + + For Windows paths, lone surrogates will be encoded like normal code points + and surrogate pairs will be merged before encoding. In case of ``utf-8`` + or ``utf-16-le`` this is equal to the `WTF-8 and WTF-16 encoding + `__. + """ + + path = _fsn2native(path) + + if is_win: + if encoding is None: + raise ValueError("invalid encoding %r" % encoding) + + try: + return _winpath2bytes(path, encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + else: + return path + + +def bytes2fsn(data, encoding): + """ + Args: + data (bytes): The data to convert + encoding (`str` or `None`): `None` if you don't care about Windows + Returns: + `fsnative` + Raises: + TypeError: If no `bytes` path is passed + ValueError: If decoding fails or no encoding is given + + Turns `bytes` to a `fsnative` path. + + The passed *encoding* is only used on platforms where paths are not + associated with an encoding (Windows for example). If you don't care about + Windows you can pass `None`. + """ + + if not isinstance(data, bytes): + raise TypeError("data needs to be bytes") + + if is_win: + if encoding is None: + raise ValueError("invalid encoding %r" % encoding) + try: + path = _bytes2winpath(data, encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + if u"\x00" in path: + raise ValueError("contains nulls") + return path + else: + if b"\x00" in data: + raise ValueError("contains nulls") + if PY2: + return data + else: + return data.decode(_encoding, "surrogateescape") + + +def uri2fsn(uri): + """ + Args: + uri (`text` or :obj:`python:str`): A file URI + Returns: + `fsnative` + Raises: + TypeError: In case an invalid type is passed + ValueError: In case the URI isn't a valid file URI + + Takes a file URI and returns a `fsnative` path + """ + + if PY2: + if isinstance(uri, text_type): + uri = uri.encode("utf-8") + if not isinstance(uri, bytes): + raise TypeError("uri needs to be ascii str or unicode") + else: + if not isinstance(uri, str): + raise TypeError("uri needs to be str") + + parsed = urlparse(uri) + scheme = parsed.scheme + netloc = parsed.netloc + path = parsed.path + + if scheme != "file": + raise ValueError("Not a file URI: %r" % uri) + + if not path: + raise ValueError("Invalid file URI: %r" % uri) + + uri = urlunparse(parsed)[7:] + + if is_win: + path = url2pathname(uri) + if netloc: + path = "\\\\" + path + if PY2: + path = path.decode("utf-8") + if u"\x00" in path: + raise ValueError("embedded null") + return path + else: + path = url2pathname(uri) + if "\x00" in path: + raise ValueError("embedded null") + if PY3: + path = fsnative(path) + return path + + +def fsn2uri(path): + """ + Args: + path (fsnative): The path to convert to an URI + Returns: + `text`: An ASCII only URI + Raises: + TypeError: If no `fsnative` was passed + ValueError: If the path can't be converted + + Takes a `fsnative` path and returns a file URI. + + On Windows non-ASCII characters will be encoded using utf-8 and then + percent encoded. + """ + + path = _fsn2native(path) + + def _quote_path(path): + # RFC 2396 + path = quote(path, "/:@&=+$,") + if PY2: + path = path.decode("ascii") + return path + + if is_win: + buf = ctypes.create_unicode_buffer(winapi.INTERNET_MAX_URL_LENGTH) + length = winapi.DWORD(winapi.INTERNET_MAX_URL_LENGTH) + flags = 0 + try: + winapi.UrlCreateFromPathW(path, buf, ctypes.byref(length), flags) + except WindowsError as e: + raise ValueError(e) + uri = buf[:length.value] + + # For some reason UrlCreateFromPathW escapes some chars outside of + # ASCII and some not. Unquote and re-quote with utf-8. + if PY3: + # latin-1 maps code points directly to bytes, which is what we want + uri = unquote(uri, "latin-1") + else: + # Python 2 does what we want by default + uri = unquote(uri) + + return _quote_path(uri.encode("utf-8", _surrogatepass)) + + else: + return u"file://" + _quote_path(path) diff --git a/lib/mutagen/_senf/_print.py b/lib/mutagen/_senf/_print.py new file mode 100755 index 00000000..051118e5 --- /dev/null +++ b/lib/mutagen/_senf/_print.py @@ -0,0 +1,353 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import sys +import os +import ctypes + +from ._fsnative import _encoding, is_win, is_unix, _surrogatepass +from ._compat import text_type, PY2, PY3 +from ._winansi import AnsiState, ansi_split +from . import _winapi as winapi + + +def print_(*objects, **kwargs): + """print_(*objects, sep=None, end=None, file=None, flush=False) + + Args: + objects (object): zero or more objects to print + sep (str): Object separator to use, defaults to ``" "`` + end (str): Trailing string to use, defaults to ``"\\n"``. + If end is ``"\\n"`` then `os.linesep` is used. + file (object): A file-like object, defaults to `sys.stdout` + flush (bool): If the file stream should be flushed + Raises: + EnvironmentError + + Like print(), but: + + * Supports printing filenames under Unix + Python 3 and Windows + Python 2 + * Emulates ANSI escape sequence support under Windows + * Never fails due to encoding/decoding errors. Tries hard to get everything + on screen as is, but will fall back to "?" if all fails. + + This does not conflict with ``colorama``, but will not use it on Windows. + """ + + sep = kwargs.get("sep") + sep = sep if sep is not None else " " + end = kwargs.get("end") + end = end if end is not None else "\n" + file = kwargs.get("file") + file = file if file is not None else sys.stdout + flush = bool(kwargs.get("flush", False)) + + if is_win: + _print_windows(objects, sep, end, file, flush) + else: + _print_unix(objects, sep, end, file, flush) + + +def _print_unix(objects, sep, end, file, flush): + """A print_() implementation which writes bytes""" + + encoding = _encoding + + if isinstance(sep, text_type): + sep = sep.encode(encoding, "replace") + if not isinstance(sep, bytes): + raise TypeError + + if isinstance(end, text_type): + end = end.encode(encoding, "replace") + if not isinstance(end, bytes): + raise TypeError + + if end == b"\n": + end = os.linesep + if PY3: + end = end.encode("ascii") + + parts = [] + for obj in objects: + if not isinstance(obj, text_type) and not isinstance(obj, bytes): + obj = text_type(obj) + if isinstance(obj, text_type): + if PY2: + obj = obj.encode(encoding, "replace") + else: + try: + obj = obj.encode(encoding, "surrogateescape") + except UnicodeEncodeError: + obj = obj.encode(encoding, "replace") + assert isinstance(obj, bytes) + parts.append(obj) + + data = sep.join(parts) + end + assert isinstance(data, bytes) + + file = getattr(file, "buffer", file) + + try: + file.write(data) + except TypeError: + if PY3: + # For StringIO, first try with surrogates + surr_data = data.decode(encoding, "surrogateescape") + try: + file.write(surr_data) + except (TypeError, ValueError): + file.write(data.decode(encoding, "replace")) + else: + # for file like objects with don't support bytes + file.write(data.decode(encoding, "replace")) + + if flush: + file.flush() + + +ansi_state = AnsiState() + + +def _print_windows(objects, sep, end, file, flush): + """The windows implementation of print_()""" + + h = winapi.INVALID_HANDLE_VALUE + + try: + fileno = file.fileno() + except (EnvironmentError, AttributeError): + pass + else: + if fileno == 1: + h = winapi.GetStdHandle(winapi.STD_OUTPUT_HANDLE) + elif fileno == 2: + h = winapi.GetStdHandle(winapi.STD_ERROR_HANDLE) + + encoding = _encoding + + parts = [] + for obj in objects: + if isinstance(obj, bytes): + obj = obj.decode(encoding, "replace") + if not isinstance(obj, text_type): + obj = text_type(obj) + parts.append(obj) + + if isinstance(sep, bytes): + sep = sep.decode(encoding, "replace") + if not isinstance(sep, text_type): + raise TypeError + + if isinstance(end, bytes): + end = end.decode(encoding, "replace") + if not isinstance(end, text_type): + raise TypeError + + if end == u"\n": + end = os.linesep + + text = sep.join(parts) + end + assert isinstance(text, text_type) + + is_console = True + if h == winapi.INVALID_HANDLE_VALUE: + is_console = False + else: + # get the default value + info = winapi.CONSOLE_SCREEN_BUFFER_INFO() + if not winapi.GetConsoleScreenBufferInfo(h, ctypes.byref(info)): + is_console = False + + if is_console: + # make sure we flush before we apply any console attributes + file.flush() + + # try to force a utf-8 code page, use the output CP if that fails + cp = winapi.GetConsoleOutputCP() + try: + encoding = "utf-8" + if winapi.SetConsoleOutputCP(65001) == 0: + encoding = None + + for is_ansi, part in ansi_split(text): + if is_ansi: + ansi_state.apply(h, part) + else: + if encoding is not None: + data = part.encode(encoding, _surrogatepass) + else: + data = _encode_codepage(cp, part) + os.write(fileno, data) + finally: + # reset the code page to what we had before + winapi.SetConsoleOutputCP(cp) + else: + # try writing bytes first, so in case of Python 2 StringIO we get + # the same type on all platforms + try: + file.write(text.encode("utf-8", _surrogatepass)) + except (TypeError, ValueError): + file.write(text) + + if flush: + file.flush() + + +def _readline_windows(): + """Raises OSError""" + + try: + fileno = sys.stdin.fileno() + except (EnvironmentError, AttributeError): + fileno = -1 + + # In case stdin is replaced, read from that + if fileno != 0: + return _readline_windows_fallback() + + h = winapi.GetStdHandle(winapi.STD_INPUT_HANDLE) + if h == winapi.INVALID_HANDLE_VALUE: + return _readline_windows_fallback() + + buf_size = 1024 + buf = ctypes.create_string_buffer(buf_size * ctypes.sizeof(winapi.WCHAR)) + read = winapi.DWORD() + + text = u"" + while True: + if winapi.ReadConsoleW( + h, buf, buf_size, ctypes.byref(read), None) == 0: + if not text: + return _readline_windows_fallback() + raise ctypes.WinError() + data = buf[:read.value * ctypes.sizeof(winapi.WCHAR)] + text += data.decode("utf-16-le", _surrogatepass) + if text.endswith(u"\r\n"): + return text[:-2] + + +def _decode_codepage(codepage, data): + """ + Args: + codepage (int) + data (bytes) + Returns: + `text` + + Decodes data using the given codepage. If some data can't be decoded + using the codepage it will not fail. + """ + + assert isinstance(data, bytes) + + if not data: + return u"" + + # get the required buffer length first + length = winapi.MultiByteToWideChar(codepage, 0, data, len(data), None, 0) + if length == 0: + raise ctypes.WinError() + + # now decode + buf = ctypes.create_unicode_buffer(length) + length = winapi.MultiByteToWideChar( + codepage, 0, data, len(data), buf, length) + if length == 0: + raise ctypes.WinError() + + return buf[:] + + +def _encode_codepage(codepage, text): + """ + Args: + codepage (int) + text (text) + Returns: + `bytes` + + Encode text using the given code page. Will not fail if a char + can't be encoded using that codepage. + """ + + assert isinstance(text, text_type) + + if not text: + return b"" + + size = (len(text.encode("utf-16-le", _surrogatepass)) // + ctypes.sizeof(winapi.WCHAR)) + + # get the required buffer size + length = winapi.WideCharToMultiByte( + codepage, 0, text, size, None, 0, None, None) + if length == 0: + raise ctypes.WinError() + + # decode to the buffer + buf = ctypes.create_string_buffer(length) + length = winapi.WideCharToMultiByte( + codepage, 0, text, size, buf, length, None, None) + if length == 0: + raise ctypes.WinError() + return buf[:length] + + +def _readline_windows_fallback(): + # In case reading from the console failed (maybe we get piped data) + # we assume the input was generated according to the output encoding. + # Got any better ideas? + assert is_win + cp = winapi.GetConsoleOutputCP() + data = getattr(sys.stdin, "buffer", sys.stdin).readline().rstrip(b"\r\n") + return _decode_codepage(cp, data) + + +def _readline_default(): + assert is_unix + data = getattr(sys.stdin, "buffer", sys.stdin).readline().rstrip(b"\r\n") + if PY3: + return data.decode(_encoding, "surrogateescape") + else: + return data + + +def _readline(): + if is_win: + return _readline_windows() + else: + return _readline_default() + + +def input_(prompt=None): + """ + Args: + prompt (object): Prints the passed object to stdout without + adding a trailing newline + Returns: + `fsnative` + Raises: + EnvironmentError + + Like :func:`python3:input` but returns a `fsnative` and allows printing + filenames as prompt to stdout. + + Use :func:`fsn2text` on the result if you just want to deal with text. + """ + + if prompt is not None: + print_(prompt, end="") + + return _readline() diff --git a/lib/mutagen/_senf/_stdlib.py b/lib/mutagen/_senf/_stdlib.py new file mode 100755 index 00000000..82415035 --- /dev/null +++ b/lib/mutagen/_senf/_stdlib.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import re +import os + +from ._fsnative import path2fsn, fsnative, is_win +from ._compat import PY2 +from ._environ import environ + + +sep = path2fsn(os.sep) +pathsep = path2fsn(os.pathsep) +curdir = path2fsn(os.curdir) +pardir = path2fsn(os.pardir) +altsep = path2fsn(os.altsep) if os.altsep is not None else None +extsep = path2fsn(os.extsep) +devnull = path2fsn(os.devnull) +defpath = path2fsn(os.defpath) + + +def getcwd(): + """Like `os.getcwd` but returns a `fsnative` path + + Returns: + `fsnative` + """ + + if is_win and PY2: + return os.getcwdu() + return os.getcwd() + + +def _get_userdir(user=None): + """Returns the user dir or None""" + + if user is not None and not isinstance(user, fsnative): + raise TypeError + + if is_win: + if "HOME" in environ: + path = environ["HOME"] + elif "USERPROFILE" in environ: + path = environ["USERPROFILE"] + elif "HOMEPATH" in environ and "HOMEDRIVE" in environ: + path = os.path.join(environ["HOMEDRIVE"], environ["HOMEPATH"]) + else: + return + + if user is None: + return path + else: + return os.path.join(os.path.dirname(path), user) + else: + import pwd + + if user is None: + if "HOME" in environ: + return environ["HOME"] + else: + try: + return path2fsn(pwd.getpwuid(os.getuid()).pw_dir) + except KeyError: + return + else: + try: + return path2fsn(pwd.getpwnam(user).pw_dir) + except KeyError: + return + + +def expanduser(path): + """ + Args: + path (pathlike): A path to expand + Returns: + `fsnative` + + Like :func:`python:os.path.expanduser` but supports unicode home + directories under Windows + Python 2 and always returns a `fsnative`. + """ + + path = path2fsn(path) + + if path == "~": + return _get_userdir() + elif path.startswith("~" + sep) or ( + altsep is not None and path.startswith("~" + altsep)): + userdir = _get_userdir() + if userdir is None: + return path + return userdir + path[1:] + elif path.startswith("~"): + sep_index = path.find(sep) + if altsep is not None: + alt_index = path.find(altsep) + if alt_index != -1 and alt_index < sep_index: + sep_index = alt_index + + if sep_index == -1: + user = path[1:] + rest = "" + else: + user = path[1:sep_index] + rest = path[sep_index:] + + userdir = _get_userdir(user) + if userdir is not None: + return userdir + rest + else: + return path + else: + return path + + +def expandvars(path): + """ + Args: + path (pathlike): A path to expand + Returns: + `fsnative` + + Like :func:`python:os.path.expandvars` but supports unicode under Windows + + Python 2 and always returns a `fsnative`. + """ + + path = path2fsn(path) + + def repl_func(match): + return environ.get(match.group(1), match.group(0)) + + path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path) + if os.name == "nt": + path = re.sub(r"%([^%]+)%", repl_func, path) + return re.sub(r"\$\{([^\}]+)\}", repl_func, path) diff --git a/lib/mutagen/_senf/_temp.py b/lib/mutagen/_senf/_temp.py new file mode 100755 index 00000000..ac44dfba --- /dev/null +++ b/lib/mutagen/_senf/_temp.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import tempfile + +from ._fsnative import path2fsn, fsnative + + +def gettempdir(): + """ + Returns: + `fsnative` + + Like :func:`python3:tempfile.gettempdir`, but always returns a `fsnative` + path + """ + + # FIXME: I don't want to reimplement all that logic, reading env vars etc. + # At least for the default it works. + return path2fsn(tempfile.gettempdir()) + + +def gettempprefix(): + """ + Returns: + `fsnative` + + Like :func:`python3:tempfile.gettempprefix`, but always returns a + `fsnative` path + """ + + return path2fsn(tempfile.gettempprefix()) + + +def mkstemp(suffix=None, prefix=None, dir=None, text=False): + """ + Args: + suffix (`pathlike` or `None`): suffix or `None` to use the default + prefix (`pathlike` or `None`): prefix or `None` to use the default + dir (`pathlike` or `None`): temp dir or `None` to use the default + text (bool): if the file should be opened in text mode + Returns: + Tuple[`int`, `fsnative`]: + A tuple containing the file descriptor and the file path + Raises: + EnvironmentError + + Like :func:`python3:tempfile.mkstemp` but always returns a `fsnative` + path. + """ + + suffix = fsnative() if suffix is None else path2fsn(suffix) + prefix = gettempprefix() if prefix is None else path2fsn(prefix) + dir = gettempdir() if dir is None else path2fsn(dir) + + return tempfile.mkstemp(suffix, prefix, dir, text) + + +def mkdtemp(suffix=None, prefix=None, dir=None): + """ + Args: + suffix (`pathlike` or `None`): suffix or `None` to use the default + prefix (`pathlike` or `None`): prefix or `None` to use the default + dir (`pathlike` or `None`): temp dir or `None` to use the default + Returns: + `fsnative`: A path to a directory + Raises: + EnvironmentError + + Like :func:`python3:tempfile.mkstemp` but always returns a `fsnative` path. + """ + + suffix = fsnative() if suffix is None else path2fsn(suffix) + prefix = gettempprefix() if prefix is None else path2fsn(prefix) + dir = gettempdir() if dir is None else path2fsn(dir) + + return tempfile.mkdtemp(suffix, prefix, dir) diff --git a/lib/mutagen/_senf/_winansi.py b/lib/mutagen/_senf/_winansi.py new file mode 100755 index 00000000..edcd01cf --- /dev/null +++ b/lib/mutagen/_senf/_winansi.py @@ -0,0 +1,311 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import ctypes +import re +import atexit + +from . import _winapi as winapi + + +def ansi_parse(code): + """Returns command, (args)""" + + return code[-1:], tuple([int(v or "0") for v in code[2:-1].split(";")]) + + +def ansi_split(text, _re=re.compile(u"(\x1b\[(\d*;?)*\S)")): + """Yields (is_ansi, text)""" + + for part in _re.split(text): + if part: + yield (bool(_re.match(part)), part) + + +class AnsiCommand(object): + TEXT = "m" + + MOVE_UP = "A" + MOVE_DOWN = "B" + MOVE_FORWARD = "C" + MOVE_BACKWARD = "D" + + SET_POS = "H" + SET_POS_ALT = "f" + + SAVE_POS = "s" + RESTORE_POS = "u" + + +class TextAction(object): + RESET_ALL = 0 + + SET_BOLD = 1 + SET_DIM = 2 + SET_ITALIC = 3 + SET_UNDERLINE = 4 + SET_BLINK = 5 + SET_BLINK_FAST = 6 + SET_REVERSE = 7 + SET_HIDDEN = 8 + + RESET_BOLD = 21 + RESET_DIM = 22 + RESET_ITALIC = 23 + RESET_UNDERLINE = 24 + RESET_BLINK = 25 + RESET_BLINK_FAST = 26 + RESET_REVERSE = 27 + RESET_HIDDEN = 28 + + FG_BLACK = 30 + FG_RED = 31 + FG_GREEN = 32 + FG_YELLOW = 33 + FG_BLUE = 34 + FG_MAGENTA = 35 + FG_CYAN = 36 + FG_WHITE = 37 + + FG_DEFAULT = 39 + + FG_LIGHT_BLACK = 90 + FG_LIGHT_RED = 91 + FG_LIGHT_GREEN = 92 + FG_LIGHT_YELLOW = 93 + FG_LIGHT_BLUE = 94 + FG_LIGHT_MAGENTA = 95 + FG_LIGHT_CYAN = 96 + FG_LIGHT_WHITE = 97 + + BG_BLACK = 40 + BG_RED = 41 + BG_GREEN = 42 + BG_YELLOW = 43 + BG_BLUE = 44 + BG_MAGENTA = 45 + BG_CYAN = 46 + BG_WHITE = 47 + + BG_DEFAULT = 49 + + BG_LIGHT_BLACK = 100 + BG_LIGHT_RED = 101 + BG_LIGHT_GREEN = 102 + BG_LIGHT_YELLOW = 103 + BG_LIGHT_BLUE = 104 + BG_LIGHT_MAGENTA = 105 + BG_LIGHT_CYAN = 106 + BG_LIGHT_WHITE = 107 + + +class AnsiState(object): + + def __init__(self): + self.default_attrs = None + + self.bold = False + self.bg_light = False + self.fg_light = False + + self.saved_pos = (0, 0) + + def do_text_action(self, attrs, action): + # In case the external state has changed, apply it it to ours. + # Mostly the first time this is called. + if attrs & winapi.FOREGROUND_INTENSITY and not self.fg_light \ + and not self.bold: + self.fg_light = True + if attrs & winapi.BACKGROUND_INTENSITY and not self.bg_light: + self.bg_light = True + + dark_fg = { + TextAction.FG_BLACK: 0, + TextAction.FG_RED: winapi.FOREGROUND_RED, + TextAction.FG_GREEN: winapi.FOREGROUND_GREEN, + TextAction.FG_YELLOW: + winapi.FOREGROUND_GREEN | winapi.FOREGROUND_RED, + TextAction.FG_BLUE: winapi.FOREGROUND_BLUE, + TextAction.FG_MAGENTA: winapi.FOREGROUND_BLUE | + winapi.FOREGROUND_RED, + TextAction.FG_CYAN: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN, + TextAction.FG_WHITE: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN | + winapi.FOREGROUND_RED, + } + + dark_bg = { + TextAction.BG_BLACK: 0, + TextAction.BG_RED: winapi.BACKGROUND_RED, + TextAction.BG_GREEN: winapi.BACKGROUND_GREEN, + TextAction.BG_YELLOW: + winapi.BACKGROUND_GREEN | winapi.BACKGROUND_RED, + TextAction.BG_BLUE: winapi.BACKGROUND_BLUE, + TextAction.BG_MAGENTA: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_RED, + TextAction.BG_CYAN: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN, + TextAction.BG_WHITE: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN | + winapi.BACKGROUND_RED, + } + + light_fg = { + TextAction.FG_LIGHT_BLACK: 0, + TextAction.FG_LIGHT_RED: winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_GREEN: winapi.FOREGROUND_GREEN, + TextAction.FG_LIGHT_YELLOW: + winapi.FOREGROUND_GREEN | winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_BLUE: winapi.FOREGROUND_BLUE, + TextAction.FG_LIGHT_MAGENTA: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_RED, + TextAction.FG_LIGHT_CYAN: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN, + TextAction.FG_LIGHT_WHITE: + winapi.FOREGROUND_BLUE | winapi.FOREGROUND_GREEN | + winapi.FOREGROUND_RED, + } + + light_bg = { + TextAction.BG_LIGHT_BLACK: 0, + TextAction.BG_LIGHT_RED: winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_GREEN: winapi.BACKGROUND_GREEN, + TextAction.BG_LIGHT_YELLOW: + winapi.BACKGROUND_GREEN | winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_BLUE: winapi.BACKGROUND_BLUE, + TextAction.BG_LIGHT_MAGENTA: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_RED, + TextAction.BG_LIGHT_CYAN: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN, + TextAction.BG_LIGHT_WHITE: + winapi.BACKGROUND_BLUE | winapi.BACKGROUND_GREEN | + winapi.BACKGROUND_RED, + } + + if action == TextAction.RESET_ALL: + attrs = self.default_attrs + self.bold = self.fg_light = self.bg_light = False + elif action == TextAction.SET_BOLD: + self.bold = True + elif action == TextAction.RESET_BOLD: + self.bold = False + elif action == TextAction.SET_DIM: + self.bold = False + elif action == TextAction.SET_REVERSE: + attrs |= winapi.COMMON_LVB_REVERSE_VIDEO + elif action == TextAction.RESET_REVERSE: + attrs &= ~winapi.COMMON_LVB_REVERSE_VIDEO + elif action == TextAction.SET_UNDERLINE: + attrs |= winapi.COMMON_LVB_UNDERSCORE + elif action == TextAction.RESET_UNDERLINE: + attrs &= ~winapi.COMMON_LVB_UNDERSCORE + elif action == TextAction.FG_DEFAULT: + attrs = (attrs & ~0xF) | (self.default_attrs & 0xF) + self.fg_light = False + elif action == TextAction.BG_DEFAULT: + attrs = (attrs & ~0xF0) | (self.default_attrs & 0xF0) + self.bg_light = False + elif action in dark_fg: + attrs = (attrs & ~0xF) | dark_fg[action] + self.fg_light = False + elif action in dark_bg: + attrs = (attrs & ~0xF0) | dark_bg[action] + self.bg_light = False + elif action in light_fg: + attrs = (attrs & ~0xF) | light_fg[action] + self.fg_light = True + elif action in light_bg: + attrs = (attrs & ~0xF0) | light_bg[action] + self.bg_light = True + + if self.fg_light or self.bold: + attrs |= winapi.FOREGROUND_INTENSITY + else: + attrs &= ~winapi.FOREGROUND_INTENSITY + + if self.bg_light: + attrs |= winapi.BACKGROUND_INTENSITY + else: + attrs &= ~winapi.BACKGROUND_INTENSITY + + return attrs + + def apply(self, handle, code): + buffer_info = winapi.CONSOLE_SCREEN_BUFFER_INFO() + if not winapi.GetConsoleScreenBufferInfo(handle, + ctypes.byref(buffer_info)): + return + + attrs = buffer_info.wAttributes + + # We take the first attrs we see as default + if self.default_attrs is None: + self.default_attrs = attrs + # Make sure that like with linux terminals the program doesn't + # affect the prompt after it exits + atexit.register( + winapi.SetConsoleTextAttribute, handle, self.default_attrs) + + cmd, args = ansi_parse(code) + if cmd == AnsiCommand.TEXT: + for action in args: + attrs = self.do_text_action(attrs, action) + winapi.SetConsoleTextAttribute(handle, attrs) + elif cmd in (AnsiCommand.MOVE_UP, AnsiCommand.MOVE_DOWN, + AnsiCommand.MOVE_FORWARD, AnsiCommand.MOVE_BACKWARD): + + coord = buffer_info.dwCursorPosition + x, y = coord.X, coord.Y + + amount = max(args[0], 1) + + if cmd == AnsiCommand.MOVE_UP: + y -= amount + elif cmd == AnsiCommand.MOVE_DOWN: + y += amount + elif cmd == AnsiCommand.MOVE_FORWARD: + x += amount + elif cmd == AnsiCommand.MOVE_BACKWARD: + x -= amount + + x = max(x, 0) + y = max(y, 0) + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) + elif cmd in (AnsiCommand.SET_POS, AnsiCommand.SET_POS_ALT): + args = list(args) + while len(args) < 2: + args.append(0) + x, y = args[:2] + + win_rect = buffer_info.srWindow + x += win_rect.Left - 1 + y += win_rect.Top - 1 + + x = max(x, 0) + y = max(y, 0) + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) + elif cmd == AnsiCommand.SAVE_POS: + win_rect = buffer_info.srWindow + coord = buffer_info.dwCursorPosition + x, y = coord.X, coord.Y + x -= win_rect.Left + y -= win_rect.Top + self.saved_pos = (x, y) + elif cmd == AnsiCommand.RESTORE_POS: + win_rect = buffer_info.srWindow + x, y = self.saved_pos + x += win_rect.Left + y += win_rect.Top + winapi.SetConsoleCursorPosition(handle, winapi.COORD(x, y)) diff --git a/lib/mutagen/_senf/_winapi.py b/lib/mutagen/_senf/_winapi.py new file mode 100755 index 00000000..effb85bb --- /dev/null +++ b/lib/mutagen/_senf/_winapi.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import ctypes +from ctypes import WinDLL, wintypes + + +shell32 = WinDLL("shell32") +kernel32 = WinDLL("kernel32") +shlwapi = WinDLL("shlwapi") + +GetCommandLineW = kernel32.GetCommandLineW +GetCommandLineW.argtypes = [] +GetCommandLineW.restype = wintypes.LPCWSTR + +CommandLineToArgvW = shell32.CommandLineToArgvW +CommandLineToArgvW.argtypes = [ + wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)] +CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR) + +LocalFree = kernel32.LocalFree +LocalFree.argtypes = [wintypes.HLOCAL] +LocalFree.restype = wintypes.HLOCAL + +# https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751.aspx +LPCTSTR = ctypes.c_wchar_p +LPWSTR = wintypes.LPWSTR +LPCWSTR = ctypes.c_wchar_p +LPTSTR = LPWSTR +PCWSTR = ctypes.c_wchar_p +PCTSTR = PCWSTR +PWSTR = ctypes.c_wchar_p +PTSTR = PWSTR +LPVOID = wintypes.LPVOID +WCHAR = wintypes.WCHAR +LPSTR = ctypes.c_char_p + +BOOL = wintypes.BOOL +LPBOOL = ctypes.POINTER(BOOL) +UINT = wintypes.UINT +WORD = wintypes.WORD +DWORD = wintypes.DWORD +SHORT = wintypes.SHORT +HANDLE = wintypes.HANDLE +ULONG = wintypes.ULONG +LPCSTR = wintypes.LPCSTR + +STD_INPUT_HANDLE = DWORD(-10) +STD_OUTPUT_HANDLE = DWORD(-11) +STD_ERROR_HANDLE = DWORD(-12) + +INVALID_HANDLE_VALUE = wintypes.HANDLE(-1).value + +INTERNET_MAX_SCHEME_LENGTH = 32 +INTERNET_MAX_PATH_LENGTH = 2048 +INTERNET_MAX_URL_LENGTH = ( + INTERNET_MAX_SCHEME_LENGTH + len("://") + INTERNET_MAX_PATH_LENGTH) + +FOREGROUND_BLUE = 0x0001 +FOREGROUND_GREEN = 0x0002 +FOREGROUND_RED = 0x0004 +FOREGROUND_INTENSITY = 0x0008 + +BACKGROUND_BLUE = 0x0010 +BACKGROUND_GREEN = 0x0020 +BACKGROUND_RED = 0x0040 +BACKGROUND_INTENSITY = 0x0080 + +COMMON_LVB_REVERSE_VIDEO = 0x4000 +COMMON_LVB_UNDERSCORE = 0x8000 + +UrlCreateFromPathW = shlwapi.UrlCreateFromPathW +UrlCreateFromPathW.argtypes = [ + PCTSTR, PTSTR, ctypes.POINTER(DWORD), DWORD] +UrlCreateFromPathW.restype = ctypes.HRESULT + +SetEnvironmentVariableW = kernel32.SetEnvironmentVariableW +SetEnvironmentVariableW.argtypes = [LPCTSTR, LPCTSTR] +SetEnvironmentVariableW.restype = wintypes.BOOL + +GetEnvironmentVariableW = kernel32.GetEnvironmentVariableW +GetEnvironmentVariableW.argtypes = [LPCTSTR, LPTSTR, DWORD] +GetEnvironmentVariableW.restype = DWORD + +GetEnvironmentStringsW = kernel32.GetEnvironmentStringsW +GetEnvironmentStringsW.argtypes = [] +GetEnvironmentStringsW.restype = ctypes.c_void_p + +FreeEnvironmentStringsW = kernel32.FreeEnvironmentStringsW +FreeEnvironmentStringsW.argtypes = [ctypes.c_void_p] +FreeEnvironmentStringsW.restype = ctypes.c_bool + +GetStdHandle = kernel32.GetStdHandle +GetStdHandle.argtypes = [DWORD] +GetStdHandle.restype = HANDLE + + +class COORD(ctypes.Structure): + + _fields_ = [ + ("X", SHORT), + ("Y", SHORT), + ] + + +class SMALL_RECT(ctypes.Structure): + + _fields_ = [ + ("Left", SHORT), + ("Top", SHORT), + ("Right", SHORT), + ("Bottom", SHORT), + ] + + +class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure): + + _fields_ = [ + ("dwSize", COORD), + ("dwCursorPosition", COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", COORD), + ] + + +GetConsoleScreenBufferInfo = kernel32.GetConsoleScreenBufferInfo +GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)] +GetConsoleScreenBufferInfo.restype = BOOL + +GetConsoleOutputCP = kernel32.GetConsoleOutputCP +GetConsoleOutputCP.argtypes = [] +GetConsoleOutputCP.restype = UINT + +SetConsoleOutputCP = kernel32.SetConsoleOutputCP +SetConsoleOutputCP.argtypes = [UINT] +SetConsoleOutputCP.restype = BOOL + +GetConsoleCP = kernel32.GetConsoleCP +GetConsoleCP.argtypes = [] +GetConsoleCP.restype = UINT + +SetConsoleCP = kernel32.SetConsoleCP +SetConsoleCP.argtypes = [UINT] +SetConsoleCP.restype = BOOL + +SetConsoleTextAttribute = kernel32.SetConsoleTextAttribute +SetConsoleTextAttribute.argtypes = [HANDLE, WORD] +SetConsoleTextAttribute.restype = BOOL + +SetConsoleCursorPosition = kernel32.SetConsoleCursorPosition +SetConsoleCursorPosition.argtypes = [HANDLE, COORD] +SetConsoleCursorPosition.restype = BOOL + +ReadConsoleW = kernel32.ReadConsoleW +ReadConsoleW.argtypes = [HANDLE, LPVOID, DWORD, ctypes.POINTER(DWORD), LPVOID] +ReadConsoleW.restype = BOOL + +MultiByteToWideChar = kernel32.MultiByteToWideChar +MultiByteToWideChar.argtypes = [ + UINT, DWORD, LPCSTR, ctypes.c_int, LPWSTR, ctypes.c_int] +MultiByteToWideChar.restype = ctypes.c_int + +WideCharToMultiByte = kernel32.WideCharToMultiByte +WideCharToMultiByte.argtypes = [ + UINT, DWORD, LPCWSTR, ctypes.c_int, LPSTR, ctypes.c_int, LPCSTR, LPBOOL] +WideCharToMultiByte.restpye = ctypes.c_int + +MoveFileW = kernel32.MoveFileW +MoveFileW.argtypes = [LPCTSTR, LPCTSTR] +MoveFileW.restype = BOOL diff --git a/lib/mutagen/_tags.py b/lib/mutagen/_tags.py old mode 100644 new mode 100755 index 8f9b9aa9..c3f2ebf6 --- a/lib/mutagen/_tags.py +++ b/lib/mutagen/_tags.py @@ -1,14 +1,107 @@ +# -*- coding: utf-8 -*- # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +from ._util import loadfile -class Metadata(object): - """An abstract dict-like object. +class PaddingInfo(object): + """PaddingInfo() - Metadata is the base class for many of the tag objects in Mutagen. + Abstract padding information object. + + This will be passed to the callback function that can be used + for saving tags. + + :: + + def my_callback(info: PaddingInfo): + return info.get_default_padding() + + The callback should return the amount of padding to use (>= 0) based on + the content size and the padding of the file after saving. The actual used + amount of padding might vary depending on the file format (due to + alignment etc.) + + The default implementation can be accessed using the + :meth:`get_default_padding` method in the callback. + + Attributes: + padding (`int`): The amount of padding left after saving in bytes + (can be negative if more data needs to be added as padding is + available) + size (`int`): The amount of data following the padding + """ + + def __init__(self, padding, size): + self.padding = padding + self.size = size + + def get_default_padding(self): + """The default implementation which tries to select a reasonable + amount of padding and which might change in future versions. + + Returns: + int: Amount of padding after saving + """ + + high = 1024 * 10 + self.size // 100 # 10 KiB + 1% of trailing data + low = 1024 + self.size // 1000 # 1 KiB + 0.1% of trailing data + + if self.padding >= 0: + # enough padding left + if self.padding > high: + # padding too large, reduce + return low + # just use existing padding as is + return self.padding + else: + # not enough padding, add some + return low + + def _get_padding(self, user_func): + if user_func is None: + return self.get_default_padding() + else: + return user_func(self) + + def __repr__(self): + return "<%s size=%d padding=%d>" % ( + type(self).__name__, self.size, self.padding) + + +class Tags(object): + """`Tags` is the base class for many of the tag objects in Mutagen. + + In many cases it has a dict like interface. + """ + + __module__ = "mutagen" + + def pprint(self): + """ + Returns: + text: tag information + """ + + raise NotImplementedError + + +class Metadata(Tags): + """Metadata(filething=None, **kwargs) + + Args: + filething (filething): a filename or a file-like object or `None` + to create an empty instance (like ``ID3()``) + + Like :class:`Tags` but for standalone tagging formats that are not + solely managed by a container format. + + Provides methods to load, save and delete tags. """ __module__ = "mutagen" @@ -17,15 +110,37 @@ class Metadata(object): if args or kwargs: self.load(*args, **kwargs) - def load(self, *args, **kwargs): + @loadfile() + def load(self, filething, **kwargs): raise NotImplementedError - def save(self, filename=None): - """Save changes to a file.""" + @loadfile(writable=False) + def save(self, filething, **kwargs): + """save(filething=None, **kwargs) + + Save changes to a file. + + Args: + filething (filething): or `None` + Raises: + MutagenError: if saving wasn't possible + """ raise NotImplementedError - def delete(self, filename=None): - """Remove tags from a file.""" + @loadfile(writable=False) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. + + In most cases this means any traces of the tag will be removed + from the file. + + Args: + filething (filething): or `None` + Raises: + MutagenError: if deleting wasn't possible + """ raise NotImplementedError diff --git a/lib/mutagen/_tools/__init__.py b/lib/mutagen/_tools/__init__.py new file mode 100755 index 00000000..3e6b1556 --- /dev/null +++ b/lib/mutagen/_tools/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. diff --git a/lib/mutagen/_tools/_util.py b/lib/mutagen/_tools/_util.py new file mode 100755 index 00000000..4e050769 --- /dev/null +++ b/lib/mutagen/_tools/_util.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import os +import signal +import contextlib +import optparse + +from mutagen._senf import print_ +from mutagen._compat import text_type, iterbytes + + +def split_escape(string, sep, maxsplit=None, escape_char="\\"): + """Like unicode/str/bytes.split but allows for the separator to be escaped + + If passed unicode/str/bytes will only return list of unicode/str/bytes. + """ + + assert len(sep) == 1 + assert len(escape_char) == 1 + + if isinstance(string, bytes): + if isinstance(escape_char, text_type): + escape_char = escape_char.encode("ascii") + iter_ = iterbytes + else: + iter_ = iter + + if maxsplit is None: + maxsplit = len(string) + + empty = string[:0] + result = [] + current = empty + escaped = False + for char in iter_(string): + if escaped: + if char != escape_char and char != sep: + current += escape_char + current += char + escaped = False + else: + if char == escape_char: + escaped = True + elif char == sep and len(result) < maxsplit: + result.append(current) + current = empty + else: + current += char + result.append(current) + return result + + +class SignalHandler(object): + + def __init__(self): + self._interrupted = False + self._nosig = False + self._init = False + + def init(self): + signal.signal(signal.SIGINT, self._handler) + signal.signal(signal.SIGTERM, self._handler) + if os.name != "nt": + signal.signal(signal.SIGHUP, self._handler) + + def _handler(self, signum, frame): + self._interrupted = True + if not self._nosig: + raise SystemExit("Aborted...") + + @contextlib.contextmanager + def block(self): + """While this context manager is active any signals for aborting + the process will be queued and exit the program once the context + is left. + """ + + self._nosig = True + yield + self._nosig = False + if self._interrupted: + raise SystemExit("Aborted...") + + +class OptionParser(optparse.OptionParser): + """OptionParser subclass which supports printing Unicode under Windows""" + + def print_help(self, file=None): + print_(self.format_help(), file=file) diff --git a/lib/mutagen/_tools/mid3cp.py b/lib/mutagen/_tools/mid3cp.py new file mode 100755 index 00000000..1339548d --- /dev/null +++ b/lib/mutagen/_tools/mid3cp.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 Marcus Sundman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""A program replicating the functionality of id3lib's id3cp, using mutagen for +tag loading and saving. +""" + +import sys +import os.path + +import mutagen +import mutagen.id3 +from mutagen._senf import print_, argv +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +VERSION = (0, 1) +_sig = SignalHandler() + + +def printerr(*args, **kwargs): + kwargs.setdefault("file", sys.stderr) + print_(*args, **kwargs) + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = mutagen.version_string + my_version = ".".join(map(str, VERSION)) + version = "mid3cp %s\nUses Mutagen %s" % (my_version, mutagen_version) + self.disable_interspersed_args() + OptionParser.__init__( + self, version=version, + usage="%prog [option(s)] ", + description=("Copies ID3 tags from to . Mutagen-based " + "replacement for id3lib's id3cp.")) + + +def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): + """Returns 0 on success""" + + if excluded_tags is None: + excluded_tags = [] + + try: + id3 = mutagen.id3.ID3(src, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found in ", src, file=sys.stderr) + return 1 + except Exception as err: + print_(str(err), file=sys.stderr) + return 1 + + if verbose: + print_(u"File", src, u"contains:", file=sys.stderr) + print_(id3.pprint(), file=sys.stderr) + + for tag in excluded_tags: + id3.delall(tag) + + if merge: + try: + target = mutagen.id3.ID3(dst, translate=False) + except mutagen.id3.ID3NoHeaderError: + # no need to merge + pass + except Exception as err: + print_(str(err), file=sys.stderr) + return 1 + else: + for frame in id3.values(): + target.add(frame) + + id3 = target + + # if the source is 2.3 save it as 2.3 + if id3.version < (2, 4, 0): + id3.update_to_v23() + v2_version = 3 + else: + id3.update_to_v24() + v2_version = 4 + + try: + id3.save(dst, v1=(2 if write_v1 else 0), v2_version=v2_version) + except Exception as err: + print_(u"Error saving", dst, u":\n%s" % text_type(err), + file=sys.stderr) + return 1 + else: + if verbose: + print_(u"Successfully saved", dst, file=sys.stderr) + return 0 + + +def main(argv): + parser = ID3OptionParser() + parser.add_option("-v", "--verbose", action="store_true", dest="verbose", + help="print out saved tags", default=False) + parser.add_option("--write-v1", action="store_true", dest="write_v1", + default=False, help="write id3v1 tags") + parser.add_option("-x", "--exclude-tag", metavar="TAG", action="append", + dest="x", help="exclude the specified tag", default=[]) + parser.add_option("--merge", action="store_true", + help="Copy over frames instead of the whole ID3 tag", + default=False) + (options, args) = parser.parse_args(argv[1:]) + + if len(args) != 2: + parser.print_help(file=sys.stderr) + return 1 + + (src, dst) = args + + if not os.path.isfile(src): + print_(u"File not found:", src, file=sys.stderr) + parser.print_help(file=sys.stderr) + return 1 + + if not os.path.isfile(dst): + printerr(u"File not found:", dst, file=sys.stderr) + parser.print_help(file=sys.stderr) + return 1 + + # Strip tags - "-x FOO" adds whitespace at the beginning of the tag name + excluded_tags = [x.strip() for x in options.x] + + with _sig.block(): + return copy(src, dst, options.merge, options.write_v1, excluded_tags, + options.verbose) + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/lib/mutagen/_tools/mid3iconv.py b/lib/mutagen/_tools/mid3iconv.py new file mode 100755 index 00000000..f2d725a2 --- /dev/null +++ b/lib/mutagen/_tools/mid3iconv.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# Copyright 2006 Emfox Zhou +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +""" +ID3iconv is a Java based ID3 encoding convertor, here's the Python version. +""" + +import sys +import locale + +import mutagen +import mutagen.id3 +from mutagen._senf import argv, print_, fsnative +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +VERSION = (0, 3) +_sig = SignalHandler() + + +def getpreferredencoding(): + return locale.getpreferredencoding() or "utf-8" + + +def isascii(string): + """Checks whether a unicode string is non-empty and contains only ASCII + characters. + """ + if not string: + return False + + try: + string.encode('ascii') + except UnicodeEncodeError: + return False + + return True + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = ".".join(map(str, mutagen.version)) + my_version = ".".join(map(str, VERSION)) + version = "mid3iconv %s\nUses Mutagen %s" % ( + my_version, mutagen_version) + return OptionParser.__init__( + self, version=version, + usage="%prog [OPTION] [FILE]...", + description=("Mutagen-based replacement the id3iconv utility, " + "which converts ID3 tags from legacy encodings " + "to Unicode and stores them using the ID3v2 format.")) + + def format_help(self, *args, **kwargs): + text = OptionParser.format_help(self, *args, **kwargs) + return text + "\nFiles are updated in-place, so use --dry-run first.\n" + + +def update(options, filenames): + encoding = options.encoding or getpreferredencoding() + verbose = options.verbose + noupdate = options.noupdate + force_v1 = options.force_v1 + remove_v1 = options.remove_v1 + + def conv(uni): + return uni.encode('iso-8859-1').decode(encoding) + + for filename in filenames: + with _sig.block(): + if verbose != "quiet": + print_(u"Updating", filename) + + if has_id3v1(filename) and not noupdate and force_v1: + mutagen.id3.delete(filename, False, True) + + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose != "quiet": + print_(u"No ID3 header found; skipping...") + continue + except Exception as err: + print_(text_type(err), file=sys.stderr) + continue + + for tag in filter(lambda t: t.startswith(("T", "COMM")), id3): + frame = id3[tag] + if isinstance(frame, mutagen.id3.TimeStampTextFrame): + # non-unicode fields + continue + try: + text = frame.text + except AttributeError: + continue + try: + text = [conv(x) for x in frame.text] + except (UnicodeError, LookupError): + continue + else: + frame.text = text + if not text or min(map(isascii, text)): + frame.encoding = 3 + else: + frame.encoding = 1 + + if verbose == "debug": + print_(id3.pprint()) + + if not noupdate: + if remove_v1: + id3.save(filename, v1=False) + else: + id3.save(filename) + + +def has_id3v1(filename): + try: + with open(filename, 'rb+') as f: + f.seek(-128, 2) + return f.read(3) == b"TAG" + except IOError: + return False + + +def main(argv): + parser = ID3OptionParser() + parser.add_option( + "-e", "--encoding", metavar="ENCODING", action="store", + type="string", dest="encoding", + help=("Specify original tag encoding (default is %s)" % ( + getpreferredencoding()))) + parser.add_option( + "-p", "--dry-run", action="store_true", dest="noupdate", + help="Do not actually modify files") + parser.add_option( + "--force-v1", action="store_true", dest="force_v1", + help="Use an ID3v1 tag even if an ID3v2 tag is present") + parser.add_option( + "--remove-v1", action="store_true", dest="remove_v1", + help="Remove v1 tag after processing the files") + parser.add_option( + "-q", "--quiet", action="store_const", dest="verbose", + const="quiet", help="Only output errors") + parser.add_option( + "-d", "--debug", action="store_const", dest="verbose", + const="debug", help="Output updated tags") + + for i, arg in enumerate(argv): + if arg == "-v1": + argv[i] = fsnative(u"--force-v1") + elif arg == "-removev1": + argv[i] = fsnative(u"--remove-v1") + + (options, args) = parser.parse_args(argv[1:]) + + if args: + update(options, args) + else: + parser.print_help() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/lib/mutagen/_tools/mid3v2.py b/lib/mutagen/_tools/mid3v2.py new file mode 100755 index 00000000..8d21c13e --- /dev/null +++ b/lib/mutagen/_tools/mid3v2.py @@ -0,0 +1,465 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Pretend to be /usr/bin/id3v2 from id3lib, sort of.""" + +import sys +import codecs +import mimetypes + +from optparse import SUPPRESS_HELP + +import mutagen +import mutagen.id3 +from mutagen.id3 import Encoding, PictureType +from mutagen._senf import fsnative, print_, argv, fsn2text, fsn2bytes, \ + bytes2fsn +from mutagen._compat import PY2, text_type + +from ._util import split_escape, SignalHandler, OptionParser + + +VERSION = (1, 3) +_sig = SignalHandler() + +global verbose +verbose = True + + +class ID3OptionParser(OptionParser): + def __init__(self): + mutagen_version = ".".join(map(str, mutagen.version)) + my_version = ".".join(map(str, VERSION)) + version = "mid3v2 %s\nUses Mutagen %s" % (my_version, mutagen_version) + self.edits = [] + OptionParser.__init__( + self, version=version, + usage="%prog [OPTION] [FILE]...", + description="Mutagen-based replacement for id3lib's id3v2.") + + def format_help(self, *args, **kwargs): + text = OptionParser.format_help(self, *args, **kwargs) + return text + """\ +You can set the value for any ID3v2 frame by using '--' and then a frame ID. +For example: + mid3v2 --TIT3 "Monkey!" file.mp3 +would set the "Subtitle/Description" frame to "Monkey!". + +Any editing operation will cause the ID3 tag to be upgraded to ID3v2.4. +""" + + +def list_frames(option, opt, value, parser): + items = mutagen.id3.Frames.items() + for name, frame in sorted(items): + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + raise SystemExit + + +def list_frames_2_2(option, opt, value, parser): + items = mutagen.id3.Frames_2_2.items() + items.sort() + for name, frame in items: + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + raise SystemExit + + +def list_genres(option, opt, value, parser): + for i, genre in enumerate(mutagen.id3.TCON.GENRES): + print_(u"%3d: %s" % (i, genre)) + raise SystemExit + + +def delete_tags(filenames, v1, v2): + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"deleting ID3 tag info in", filename, file=sys.stderr) + mutagen.id3.delete(filename, v1, v2) + + +def delete_frames(deletes, filenames): + + try: + deletes = frame_from_fsnative(deletes) + except ValueError as err: + print_(text_type(err), file=sys.stderr) + + frames = deletes.split(",") + + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"deleting %s from" % deletes, filename, + file=sys.stderr) + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose: + print_(u"No ID3 header found; skipping.", file=sys.stderr) + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + for frame in frames: + id3.delall(frame) + id3.save() + + +def frame_from_fsnative(arg): + """Takes item from argv and returns ascii native str + or raises ValueError. + """ + + assert isinstance(arg, fsnative) + + text = fsn2text(arg, strict=True) + if PY2: + return text.encode("ascii") + else: + return text.encode("ascii").decode("ascii") + + +def value_from_fsnative(arg, escape): + """Takes an item from argv and returns a text_type value without + surrogate escapes or raises ValueError. + """ + + assert isinstance(arg, fsnative) + + if escape: + bytes_ = fsn2bytes(arg, "utf-8") + if PY2: + bytes_ = bytes_.decode("string_escape") + else: + bytes_ = codecs.escape_decode(bytes_)[0] + arg = bytes2fsn(bytes_, "utf-8") + + text = fsn2text(arg, strict=True) + return text + + +def error(*args): + print_(*args, file=sys.stderr) + raise SystemExit(1) + + +def get_frame_encoding(frame_id, value): + if frame_id == "APIC": + # See https://github.com/beetbox/beets/issues/899#issuecomment-62437773 + return Encoding.UTF16 if value else Encoding.LATIN1 + else: + return Encoding.UTF8 + + +def write_files(edits, filenames, escape): + # unescape escape sequences and decode values + encoded_edits = [] + for frame, value in edits: + if not value: + continue + + try: + frame = frame_from_fsnative(frame) + except ValueError as err: + print_(text_type(err), file=sys.stderr) + + assert isinstance(frame, str) + + # strip "--" + frame = frame[2:] + + try: + value = value_from_fsnative(value, escape) + except ValueError as err: + error(u"%s: %s" % (frame, text_type(err))) + + assert isinstance(value, text_type) + + encoded_edits.append((frame, value)) + edits = encoded_edits + + # preprocess: + # for all [frame,value] pairs in the edits list + # gather values for identical frames into a list + tmp = {} + for frame, value in edits: + if frame in tmp: + tmp[frame].append(value) + else: + tmp[frame] = [value] + # edits is now a dictionary of frame -> [list of values] + edits = tmp + + # escape also enables escaping of the split separator + if escape: + string_split = split_escape + else: + string_split = lambda s, *args, **kwargs: s.split(*args, **kwargs) + + for filename in filenames: + with _sig.block(): + if verbose: + print_(u"Writing", filename, file=sys.stderr) + try: + id3 = mutagen.id3.ID3(filename) + except mutagen.id3.ID3NoHeaderError: + if verbose: + print_(u"No ID3 header found; creating a new tag", + file=sys.stderr) + id3 = mutagen.id3.ID3() + except Exception as err: + print_(str(err), file=sys.stderr) + continue + for (frame, vlist) in edits.items(): + if frame == "POPM": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + email, rating, count = values[0], 0, 0 + elif len(values) == 2: + email, rating, count = values[0], values[1], 0 + else: + email, rating, count = values + + frame = mutagen.id3.POPM( + email=email, rating=int(rating), count=int(count)) + id3.add(frame) + elif frame == "APIC": + for value in vlist: + values = string_split(value, ":") + # FIXME: doesn't support filenames with an invalid + # encoding since we have already decoded at that point + fn = values[0] + + if len(values) >= 2: + desc = values[1] + else: + desc = u"cover" + + if len(values) >= 3: + try: + picture_type = int(values[2]) + except ValueError: + error(u"Invalid picture type: %r" % values[1]) + else: + picture_type = PictureType.COVER_FRONT + + if len(values) >= 4: + mime = values[3] + else: + mime = mimetypes.guess_type(fn)[0] or "image/jpeg" + + if len(values) >= 5: + error("APIC: Invalid format") + + encoding = get_frame_encoding(frame, desc) + + try: + with open(fn, "rb") as h: + data = h.read() + except IOError as e: + error(text_type(e)) + + frame = mutagen.id3.APIC(encoding=encoding, mime=mime, + desc=desc, type=picture_type, data=data) + + id3.add(frame) + elif frame == "COMM": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + value, desc, lang = values[0], "", "eng" + elif len(values) == 2: + desc, value, lang = values[0], values[1], "eng" + else: + value = ":".join(values[1:-1]) + desc, lang = values[0], values[-1] + frame = mutagen.id3.COMM( + encoding=3, text=value, lang=lang, desc=desc) + id3.add(frame) + elif frame == "UFID": + for value in vlist: + values = string_split(value, ":") + if len(values) != 2: + error(u"Invalid value: %r" % values) + owner = values[0] + data = values[1].encode("utf-8") + frame = mutagen.id3.UFID(owner=owner, data=data) + id3.add(frame) + elif frame == "TXXX": + for value in vlist: + values = string_split(value, ":", 1) + if len(values) == 1: + desc, value = "", values[0] + else: + desc, value = values[0], values[1] + frame = mutagen.id3.TXXX( + encoding=3, text=value, desc=desc) + id3.add(frame) + elif issubclass(mutagen.id3.Frames[frame], + mutagen.id3.UrlFrame): + frame = mutagen.id3.Frames[frame](encoding=3, url=vlist) + id3.add(frame) + else: + frame = mutagen.id3.Frames[frame](encoding=3, text=vlist) + id3.add(frame) + id3.save(filename) + + +def list_tags(filenames): + for filename in filenames: + print_("IDv2 tag info for", filename) + try: + id3 = mutagen.id3.ID3(filename, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found; skipping.") + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + print_(id3.pprint()) + + +def list_tags_raw(filenames): + for filename in filenames: + print_("Raw IDv2 tag info for", filename) + try: + id3 = mutagen.id3.ID3(filename, translate=False) + except mutagen.id3.ID3NoHeaderError: + print_(u"No ID3 header found; skipping.") + except Exception as err: + print_(text_type(err), file=sys.stderr) + raise SystemExit(1) + else: + for frame in id3.values(): + print_(text_type(repr(frame))) + + +def main(argv): + parser = ID3OptionParser() + parser.add_option( + "-v", "--verbose", action="store_true", dest="verbose", default=False, + help="be verbose") + parser.add_option( + "-q", "--quiet", action="store_false", dest="verbose", + help="be quiet (the default)") + parser.add_option( + "-e", "--escape", action="store_true", default=False, + help="enable interpretation of backslash escapes") + parser.add_option( + "-f", "--list-frames", action="callback", callback=list_frames, + help="Display all possible frames for ID3v2.3 / ID3v2.4") + parser.add_option( + "--list-frames-v2.2", action="callback", callback=list_frames_2_2, + help="Display all possible frames for ID3v2.2") + parser.add_option( + "-L", "--list-genres", action="callback", callback=list_genres, + help="Lists all ID3v1 genres") + parser.add_option( + "-l", "--list", action="store_const", dest="action", const="list", + help="Lists the tag(s) on the open(s)") + parser.add_option( + "--list-raw", action="store_const", dest="action", const="list-raw", + help="Lists the tag(s) on the open(s) in Python format") + parser.add_option( + "-d", "--delete-v2", action="store_const", dest="action", + const="delete-v2", help="Deletes ID3v2 tags") + parser.add_option( + "-s", "--delete-v1", action="store_const", dest="action", + const="delete-v1", help="Deletes ID3v1 tags") + parser.add_option( + "-D", "--delete-all", action="store_const", dest="action", + const="delete-v1-v2", help="Deletes ID3v1 and ID3v2 tags") + parser.add_option( + '--delete-frames', metavar='FID1,FID2,...', action='store', + dest='deletes', default='', help="Delete the given frames") + parser.add_option( + "-C", "--convert", action="store_const", dest="action", + const="convert", + help="Convert tags to ID3v2.4 (any editing will do this)") + + parser.add_option( + "-a", "--artist", metavar='"ARTIST"', action="callback", + help="Set the artist information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TPE1"), + args[2]))) + parser.add_option( + "-A", "--album", metavar='"ALBUM"', action="callback", + help="Set the album title information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TALB"), + args[2]))) + parser.add_option( + "-t", "--song", metavar='"SONG"', action="callback", + help="Set the song title information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TIT2"), + args[2]))) + parser.add_option( + "-c", "--comment", metavar='"DESCRIPTION":"COMMENT":"LANGUAGE"', + action="callback", help="Set the comment information", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--COMM"), + args[2]))) + parser.add_option( + "-p", "--picture", + metavar='"FILENAME":"DESCRIPTION":"IMAGE-TYPE":"MIME-TYPE"', + action="callback", help="Set the picture", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--APIC"), + args[2]))) + parser.add_option( + "-g", "--genre", metavar='"GENRE"', action="callback", + help="Set the genre or genre number", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TCON"), + args[2]))) + parser.add_option( + "-y", "--year", "--date", metavar='YYYY[-MM-DD]', action="callback", + help="Set the year/date", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TDRC"), + args[2]))) + parser.add_option( + "-T", "--track", metavar='"num/num"', action="callback", + help="Set the track number/(optional) total tracks", type="string", + callback=lambda *args: args[3].edits.append((fsnative(u"--TRCK"), + args[2]))) + + for key, frame in mutagen.id3.Frames.items(): + if (issubclass(frame, mutagen.id3.TextFrame) + or issubclass(frame, mutagen.id3.UrlFrame) + or issubclass(frame, mutagen.id3.POPM) + or frame in (mutagen.id3.APIC, mutagen.id3.UFID)): + parser.add_option( + "--" + key, action="callback", help=SUPPRESS_HELP, + type='string', metavar="value", # optparse blows up with this + callback=lambda *args: args[3].edits.append(args[1:3])) + + (options, args) = parser.parse_args(argv[1:]) + global verbose + verbose = options.verbose + + if args: + if parser.edits or options.deletes: + if options.deletes: + delete_frames(options.deletes, args) + if parser.edits: + write_files(parser.edits, args, options.escape) + elif options.action in [None, 'list']: + list_tags(args) + elif options.action == "list-raw": + list_tags_raw(args) + elif options.action == "convert": + write_files([], args, options.escape) + elif options.action.startswith("delete"): + delete_tags(args, "v1" in options.action, "v2" in options.action) + else: + parser.print_help() + else: + parser.print_help() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/lib/mutagen/_tools/moggsplit.py b/lib/mutagen/_tools/moggsplit.py new file mode 100755 index 00000000..12cd2522 --- /dev/null +++ b/lib/mutagen/_tools/moggsplit.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Split a multiplex/chained Ogg file into its component parts.""" + +import os + +import mutagen.ogg +from mutagen._senf import argv + +from ._util import SignalHandler, OptionParser + + +_sig = SignalHandler() + + +def main(argv): + from mutagen.ogg import OggPage + parser = OptionParser( + usage="%prog [options] filename.ogg ...", + description="Split Ogg logical streams using Mutagen.", + version="Mutagen %s" % ".".join(map(str, mutagen.version)) + ) + + parser.add_option( + "--extension", dest="extension", default="ogg", metavar='ext', + help="use this extension (default 'ogg')") + parser.add_option( + "--pattern", dest="pattern", default="%(base)s-%(stream)d.%(ext)s", + metavar='pattern', help="name files using this pattern") + parser.add_option( + "--m3u", dest="m3u", action="store_true", default=False, + help="generate an m3u (playlist) file") + + (options, args) = parser.parse_args(argv[1:]) + if not args: + raise SystemExit(parser.print_help() or 1) + + format = {'ext': options.extension} + for filename in args: + with _sig.block(): + fileobjs = {} + format["base"] = os.path.splitext(os.path.basename(filename))[0] + fileobj = open(filename, "rb") + if options.m3u: + m3u = open(format["base"] + ".m3u", "w") + fileobjs["m3u"] = m3u + else: + m3u = None + while True: + try: + page = OggPage(fileobj) + except EOFError: + break + else: + format["stream"] = page.serial + if page.serial not in fileobjs: + new_filename = options.pattern % format + new_fileobj = open(new_filename, "wb") + fileobjs[page.serial] = new_fileobj + if m3u: + m3u.write(new_filename + "\r\n") + fileobjs[page.serial].write(page.write()) + for f in fileobjs.values(): + f.close() + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/lib/mutagen/_tools/mutagen_inspect.py b/lib/mutagen/_tools/mutagen_inspect.py new file mode 100755 index 00000000..6bd6c614 --- /dev/null +++ b/lib/mutagen/_tools/mutagen_inspect.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Full tag list for any given file.""" + +from mutagen._senf import print_, argv +from mutagen._compat import text_type + +from ._util import SignalHandler, OptionParser + + +_sig = SignalHandler() + + +def main(argv): + from mutagen import File + + parser = OptionParser() + parser.add_option("--no-flac", help="Compatibility; does nothing.") + parser.add_option("--no-mp3", help="Compatibility; does nothing.") + parser.add_option("--no-apev2", help="Compatibility; does nothing.") + + (options, args) = parser.parse_args(argv[1:]) + if not args: + raise SystemExit(parser.print_help() or 1) + + for filename in args: + print_(u"--", filename) + try: + print_(u"-", File(filename).pprint()) + except AttributeError: + print_(u"- Unknown file type") + except Exception as err: + print_(text_type(err)) + print_(u"") + + +def entry_point(): + _sig.init() + return main(argv) diff --git a/lib/mutagen/_tools/mutagen_pony.py b/lib/mutagen/_tools/mutagen_pony.py new file mode 100755 index 00000000..e4a496c7 --- /dev/null +++ b/lib/mutagen/_tools/mutagen_pony.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Joe Wreschnig, Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import os +import sys +import traceback + +from mutagen._senf import print_, argv + +from ._util import SignalHandler + + +class Report(object): + def __init__(self, pathname): + self.name = pathname + self.files = 0 + self.unsync = 0 + self.missings = 0 + self.errors = [] + self.exceptions = {} + self.versions = {} + + def missing(self, filename): + self.missings += 1 + self.files += 1 + + def error(self, filename): + Ex, value, trace = sys.exc_info() + self.exceptions.setdefault(Ex, 0) + self.exceptions[Ex] += 1 + self.errors.append((filename, Ex, value, trace)) + self.files += 1 + + def success(self, id3): + self.versions.setdefault(id3.version, 0) + self.versions[id3.version] += 1 + self.files += 1 + if id3.f_unsynch: + self.unsync += 1 + + def __str__(self): + strings = ["-- Report for %s --" % self.name] + if self.files == 0: + return strings[0] + "\n" + "No MP3 files found.\n" + + good = self.files - len(self.errors) + strings.append("Loaded %d/%d files (%d%%)" % ( + good, self.files, (float(good) / self.files) * 100)) + strings.append("%d files with unsynchronized frames." % self.unsync) + strings.append("%d files without tags." % self.missings) + + strings.append("\nID3 Versions:") + items = list(self.versions.items()) + items.sort() + for v, i in items: + strings.append(" %s\t%d" % (".".join(map(str, v)), i)) + + if self.exceptions: + strings.append("\nExceptions:") + items = list(self.exceptions.items()) + items.sort() + for Ex, i in items: + strings.append(" %-20s\t%d" % (Ex.__name__, i)) + + if self.errors: + strings.append("\nERRORS:\n") + for filename, Ex, value, trace in self.errors: + strings.append("\nReading %s:" % filename) + strings.append( + "".join(traceback.format_exception(Ex, value, trace)[1:])) + else: + strings.append("\nNo errors!") + + return("\n".join(strings)) + + +def check_dir(path): + from mutagen.mp3 import MP3 + + rep = Report(path) + print_(u"Scanning", path) + for path, dirs, files in os.walk(path): + files.sort() + for fn in files: + if not fn.lower().endswith('.mp3'): + continue + ffn = os.path.join(path, fn) + try: + mp3 = MP3(ffn) + except Exception: + rep.error(ffn) + else: + if mp3.tags is None: + rep.missing(ffn) + else: + rep.success(mp3.tags) + + print_(str(rep)) + + +def main(argv): + if len(argv) == 1: + print_(u"Usage:", argv[0], u"directory ...") + else: + for path in argv[1:]: + check_dir(path) + + +def entry_point(): + SignalHandler().init() + return main(argv) diff --git a/lib/mutagen/_util.py b/lib/mutagen/_util.py old mode 100644 new mode 100755 index 570744b1..a178eaa4 --- a/lib/mutagen/_util.py +++ b/lib/mutagen/_util.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Utility classes for Mutagen. @@ -12,13 +12,224 @@ You should not rely on the interfaces here being stable. They are intended for internal use in Mutagen only. """ +import sys import struct import codecs +import errno +try: + import mmap +except ImportError: + # Google App Engine has no mmap: + # https://github.com/quodlibet/mutagen/issues/286 + mmap = None + +from collections import namedtuple +from contextlib import contextmanager +from functools import wraps from fnmatch import fnmatchcase -from ._compat import chr_, text_type, PY2, iteritems, iterbytes, \ - integer_types, xrange +from ._compat import chr_, PY2, iteritems, iterbytes, integer_types, xrange, \ + izip, text_type, reraise + + +def is_fileobj(fileobj): + """Returns: + bool: if an argument passed ot mutagen should be treated as a + file object + """ + + # open() only handles str/bytes, so we can be strict + return not isinstance(fileobj, (text_type, bytes)) + + +def verify_fileobj(fileobj, writable=False): + """Verifies that the passed fileobj is a file like object which + we can use. + + Args: + writable (bool): verify that the file object is writable as well + + Raises: + ValueError: In case the object is not a file object that is readable + (or writable if required) or is not opened in bytes mode. + """ + + try: + data = fileobj.read(0) + except Exception: + if not hasattr(fileobj, "read"): + raise ValueError("%r not a valid file object" % fileobj) + raise ValueError("Can't read from file object %r" % fileobj) + + if not isinstance(data, bytes): + raise ValueError( + "file object %r not opened in binary mode" % fileobj) + + if writable: + try: + fileobj.write(b"") + except Exception: + if not hasattr(fileobj, "write"): + raise ValueError("%r not a valid file object" % fileobj) + raise ValueError("Can't write to file object %r" % fileobj) + + +def verify_filename(filename): + """Checks of the passed in filename has the correct type. + + Raises: + ValueError: if not a filename + """ + + if is_fileobj(filename): + raise ValueError("%r not a filename" % filename) + + +def fileobj_name(fileobj): + """ + Returns: + text: A potential filename for a file object. Always a valid + path type, but might be empty or non-existent. + """ + + value = getattr(fileobj, "name", u"") + if not isinstance(value, (text_type, bytes)): + value = text_type(value) + return value + + +def loadfile(method=True, writable=False, create=False): + """A decorator for functions taking a `filething` as a first argument. + + Passes a FileThing instance as the first argument to the wrapped function. + + Args: + method (bool): If the wrapped functions is a method + writable (bool): If a filename is passed opens the file readwrite, if + passed a file object verifies that it is writable. + create (bool): If passed a filename that does not exist will create + a new empty file. + """ + + def convert_file_args(args, kwargs): + filething = args[0] if args else None + filename = kwargs.pop("filename", None) + fileobj = kwargs.pop("fileobj", None) + return filething, filename, fileobj, args[1:], kwargs + + def wrap(func): + + @wraps(func) + def wrapper(self, *args, **kwargs): + filething, filename, fileobj, args, kwargs = \ + convert_file_args(args, kwargs) + with _openfile(self, filething, filename, fileobj, + writable, create) as h: + return func(self, h, *args, **kwargs) + + @wraps(func) + def wrapper_func(*args, **kwargs): + filething, filename, fileobj, args, kwargs = \ + convert_file_args(args, kwargs) + with _openfile(None, filething, filename, fileobj, + writable, create) as h: + return func(h, *args, **kwargs) + + return wrapper if method else wrapper_func + + return wrap + + +def convert_error(exc_src, exc_dest): + """A decorator for reraising exceptions with a different type. + Mostly useful for IOError. + + Args: + exc_src (type): The source exception type + exc_dest (type): The target exception type. + """ + + def wrap(func): + + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exc_dest: + raise + except exc_src as err: + reraise(exc_dest, err, sys.exc_info()[2]) + + return wrapper + + return wrap + + +FileThing = namedtuple("FileThing", ["fileobj", "filename", "name"]) +"""filename is None if the source is not a filename. name is a filename which +can be used for file type detection +""" + + +@contextmanager +def _openfile(instance, filething, filename, fileobj, writable, create): + """yields a FileThing + + Args: + filething: Either a file name, a file object or None + filename: Either a file name or None + fileobj: Either a file object or None + writable (bool): if the file should be opened + create (bool): if the file should be created if it doesn't exist. + implies writable + Raises: + MutagenError: In case opening the file failed + TypeError: in case neither a file name or a file object is passed + """ + + assert not create or writable + + # to allow stacked context managers, just pass the result through + if isinstance(filething, FileThing): + filename = filething.filename + fileobj = filething.fileobj + filething = None + + if filething is not None: + if is_fileobj(filething): + fileobj = filething + else: + filename = filething + + if instance is not None: + # XXX: take "not writable" as loading the file.. + if not writable: + instance.filename = filename + elif filename is None: + filename = getattr(instance, "filename", None) + + if fileobj is not None: + verify_fileobj(fileobj, writable=writable) + yield FileThing(fileobj, filename, filename or fileobj_name(fileobj)) + elif filename is not None: + verify_filename(filename) + try: + fileobj = open(filename, "rb+" if writable else "rb") + except IOError as e: + if create and e.errno == errno.ENOENT: + assert writable + try: + fileobj = open(filename, "wb+") + except IOError as e2: + raise MutagenError(e2) + else: + raise MutagenError(e) + + with fileobj as fileobj: + yield FileThing(fileobj, filename, filename) + else: + raise TypeError("Missing filename or fileobj argument") class MutagenError(Exception): @@ -27,8 +238,15 @@ class MutagenError(Exception): .. versionadded:: 1.25 """ + __module__ = "mutagen" + def total_ordering(cls): + """Adds all possible ordering methods to a class. + + Needs a working __eq__ and __lt__ and will supply the rest. + """ + assert "__eq__" in cls.__dict__ assert "__lt__" in cls.__dict__ @@ -58,6 +276,25 @@ def hashable(cls): def enum(cls): + """A decorator for creating an int enum class. + + Makes the values a subclass of the type and implements repr/str. + The new class will be a subclass of int. + + Args: + cls (type): The class to convert to an enum + + Returns: + type: A new class + + :: + + @enum + class Foo(object): + FOO = 1 + BAR = 2 + """ + assert cls.__bases__ == (object,) d = dict(cls.__dict__) @@ -71,13 +308,72 @@ def enum(cls): setattr(new_type, key, value_instance) map_[value] = key - def repr_(self): + def str_(self): if self in map_: return "%s.%s" % (type(self).__name__, map_[self]) - else: - return "%s(%s)" % (type(self).__name__, self) + return "%d" % int(self) + + def repr_(self): + if self in map_: + return "<%s.%s: %d>" % (type(self).__name__, map_[self], int(self)) + return "%d" % int(self) setattr(new_type, "__repr__", repr_) + setattr(new_type, "__str__", str_) + + return new_type + + +def flags(cls): + """A decorator for creating an int flags class. + + Makes the values a subclass of the type and implements repr/str. + The new class will be a subclass of int. + + Args: + cls (type): The class to convert to an flags + + Returns: + type: A new class + + :: + + @flags + class Foo(object): + FOO = 1 + BAR = 2 + """ + + assert cls.__bases__ == (object,) + + d = dict(cls.__dict__) + new_type = type(cls.__name__, (int,), d) + new_type.__module__ = cls.__module__ + + map_ = {} + for key, value in iteritems(d): + if key.upper() == key and isinstance(value, integer_types): + value_instance = new_type(value) + setattr(new_type, key, value_instance) + map_[value] = key + + def str_(self): + value = int(self) + matches = [] + for k, v in map_.items(): + if value & k: + matches.append("%s.%s" % (type(self).__name__, v)) + value &= ~k + if value != 0 or not matches: + matches.append(text_type(value)) + + return " | ".join(matches) + + def repr_(self): + return "<%s: %d>" % (str(self), int(self)) + + setattr(new_type, "__repr__", repr_) + setattr(new_type, "__str__", str_) return new_type @@ -124,7 +420,7 @@ class DictMixin(object): itervalues = lambda self: iter(self.values()) def items(self): - return list(zip(self.keys(), self.values())) + return list(izip(self.keys(), self.values())) if PY2: iteritems = lambda s: iter(s.items()) @@ -237,6 +533,19 @@ def _fill_cdata(cls): if s.size == 1: esuffix = "" bits = str(s.size * 8) + + if unsigned: + max_ = 2 ** (s.size * 8) - 1 + min_ = 0 + else: + max_ = 2 ** (s.size * 8 - 1) - 1 + min_ = - 2 ** (s.size * 8 - 1) + + funcs["%s%s_min" % (prefix, name)] = min_ + funcs["%s%s_max" % (prefix, name)] = max_ + funcs["%sint%s_min" % (prefix, bits)] = min_ + funcs["%sint%s_max" % (prefix, bits)] = max_ + funcs["%s%s%s" % (prefix, name, esuffix)] = unpack funcs["%sint%s%s" % (prefix, bits, esuffix)] = unpack funcs["%s%s%s_from" % (prefix, name, esuffix)] = unpack_from @@ -259,8 +568,8 @@ class cdata(object): error = error bitswap = b''.join( - chr_(sum(((val >> i) & 1) << (7 - i) for i in range(8))) - for val in range(256)) + chr_(sum(((val >> i) & 1) << (7 - i) for i in xrange(8))) + for val in xrange(256)) test_bit = staticmethod(lambda value, n: bool((value >> n) & 1)) @@ -268,45 +577,210 @@ class cdata(object): _fill_cdata(cdata) -def lock(fileobj): - """Lock a file object 'safely'. +def get_size(fileobj): + """Returns the size of the file. + The position when passed in will be preserved if no error occurs. - That means a failure to lock because the platform doesn't - support fcntl or filesystem locks is not considered a - failure. This call does block. - - Returns whether or not the lock was successful, or - raises an exception in more extreme circumstances (full - lock table, invalid file). + Args: + fileobj (fileobj) + Returns: + int: The size of the file + Raises: + IOError """ + old_pos = fileobj.tell() + try: + fileobj.seek(0, 2) + return fileobj.tell() + finally: + fileobj.seek(old_pos, 0) + + +def read_full(fileobj, size): + """Like fileobj.read but raises IOError if no all requested data is + returned. + + If you want to distinguish IOError and the EOS case, better handle + the error yourself instead of using this. + + Args: + fileobj (fileobj) + size (int): amount of bytes to read + Raises: + IOError: In case read fails or not enough data is read + """ + + if size < 0: + raise ValueError("size must not be negative") + + data = fileobj.read(size) + if len(data) != size: + raise IOError + return data + + +def seek_end(fileobj, offset): + """Like fileobj.seek(-offset, 2), but will not try to go beyond the start + + Needed since file objects from BytesIO will not raise IOError and + file objects from open() will raise IOError if going to a negative offset. + To make things easier for custom implementations, instead of allowing + both behaviors, we just don't do it. + + Args: + fileobj (fileobj) + offset (int): how many bytes away from the end backwards to seek to + + Raises: + IOError + """ + + if offset < 0: + raise ValueError + + if get_size(fileobj) < offset: + fileobj.seek(0, 0) + else: + fileobj.seek(-offset, 2) + + +def mmap_move(fileobj, dest, src, count): + """Mmaps the file object if possible and moves 'count' data + from 'src' to 'dest'. All data has to be inside the file size + (enlarging the file through this function isn't possible) + + Will adjust the file offset. + + Args: + fileobj (fileobj) + dest (int): The destination offset + src (int): The source offset + count (int) The amount of data to move + Raises: + mmap.error: In case move failed + IOError: In case an operation on the fileobj fails + ValueError: In case invalid parameters were given + """ + + assert mmap is not None, "no mmap support" + + if dest < 0 or src < 0 or count < 0: + raise ValueError("Invalid parameters") + try: - import fcntl - except ImportError: - return False - else: - try: - fcntl.lockf(fileobj, fcntl.LOCK_EX) - except IOError: - # FIXME: There's possibly a lot of complicated - # logic that needs to go here in case the IOError - # is EACCES or EAGAIN. - return False - else: - return True + fileno = fileobj.fileno() + except (AttributeError, IOError): + raise mmap.error( + "File object does not expose/support a file descriptor") + + fileobj.seek(0, 2) + filesize = fileobj.tell() + length = max(dest, src) + count + + if length > filesize: + raise ValueError("Not in file size boundary") + + offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) * + mmap.ALLOCATIONGRANULARITY) + assert dest >= offset + assert src >= offset + assert offset % mmap.ALLOCATIONGRANULARITY == 0 + + # Windows doesn't handle empty mappings, add a fast path here instead + if count == 0: + return + + # fast path + if src == dest: + return + + fileobj.flush() + file_map = mmap.mmap(fileno, length - offset, offset=offset) + try: + file_map.move(dest - offset, src - offset, count) + finally: + file_map.close() -def unlock(fileobj): - """Unlock a file object. +def resize_file(fobj, diff, BUFFER_SIZE=2 ** 16): + """Resize a file by `diff`. - Don't call this on a file object unless a call to lock() - returned true. + New space will be filled with zeros. + + Args: + fobj (fileobj) + diff (int): amount of size to change + Raises: + IOError """ - # If this fails there's a mismatched lock/unlock pair, - # so we definitely don't want to ignore errors. - import fcntl - fcntl.lockf(fileobj, fcntl.LOCK_UN) + fobj.seek(0, 2) + filesize = fobj.tell() + + if diff < 0: + if filesize + diff < 0: + raise ValueError + # truncate flushes internally + fobj.truncate(filesize + diff) + elif diff > 0: + try: + while diff: + addsize = min(BUFFER_SIZE, diff) + fobj.write(b"\x00" * addsize) + diff -= addsize + fobj.flush() + except IOError as e: + if e.errno == errno.ENOSPC: + # To reduce the chance of corrupt files in case of missing + # space try to revert the file expansion back. Of course + # in reality every in-file-write can also fail due to COW etc. + # Note: IOError gets also raised in flush() due to buffering + fobj.truncate(filesize) + raise + + +def fallback_move(fobj, dest, src, count, BUFFER_SIZE=2 ** 16): + """Moves data around using read()/write(). + + Args: + fileobj (fileobj) + dest (int): The destination offset + src (int): The source offset + count (int) The amount of data to move + Raises: + IOError: In case an operation on the fileobj fails + ValueError: In case invalid parameters were given + """ + + if dest < 0 or src < 0 or count < 0: + raise ValueError + + fobj.seek(0, 2) + filesize = fobj.tell() + + if max(dest, src) + count > filesize: + raise ValueError("area outside of file") + + if src > dest: + moved = 0 + while count - moved: + this_move = min(BUFFER_SIZE, count - moved) + fobj.seek(src + moved) + buf = fobj.read(this_move) + fobj.seek(dest + moved) + fobj.write(buf) + moved += this_move + fobj.flush() + else: + while count: + this_move = min(BUFFER_SIZE, count) + fobj.seek(src + count - this_move) + buf = fobj.read(this_move) + fobj.seek(count + dest - this_move) + fobj.write(buf) + count -= this_move + fobj.flush() def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): @@ -315,60 +789,34 @@ def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): fobj must be an open file object, open rb+ or equivalent. Mutagen tries to use mmap to resize the file, but falls back to a significantly slower method if mmap fails. + + Args: + fobj (fileobj) + size (int): The amount of space to insert + offset (int): The offset at which to insert the space + Raises: + IOError """ - assert 0 < size - assert 0 <= offset - locked = False + if size < 0 or offset < 0: + raise ValueError + fobj.seek(0, 2) filesize = fobj.tell() movesize = filesize - offset - fobj.write(b'\x00' * size) - fobj.flush() - try: + + if movesize < 0: + raise ValueError + + resize_file(fobj, size, BUFFER_SIZE) + + if mmap is not None: try: - import mmap - file_map = mmap.mmap(fobj.fileno(), filesize + size) - try: - file_map.move(offset + size, offset, movesize) - finally: - file_map.close() - except (ValueError, EnvironmentError, ImportError): - # handle broken mmap scenarios - locked = lock(fobj) - fobj.truncate(filesize) - - fobj.seek(0, 2) - padsize = size - # Don't generate an enormous string if we need to pad - # the file out several megs. - while padsize: - addsize = min(BUFFER_SIZE, padsize) - fobj.write(b"\x00" * addsize) - padsize -= addsize - - fobj.seek(filesize, 0) - while movesize: - # At the start of this loop, fobj is pointing at the end - # of the data we need to move, which is of movesize length. - thismove = min(BUFFER_SIZE, movesize) - # Seek back however much we're going to read this frame. - fobj.seek(-thismove, 1) - nextpos = fobj.tell() - # Read it, so we're back at the end. - data = fobj.read(thismove) - # Seek back to where we need to write it. - fobj.seek(-thismove + size, 1) - # Write it. - fobj.write(data) - # And seek back to the end of the unmoved data. - fobj.seek(nextpos) - movesize -= thismove - - fobj.flush() - finally: - if locked: - unlock(fobj) + mmap_move(fobj, offset + size, offset, movesize) + except mmap.error: + fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) + else: + fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): @@ -377,46 +825,71 @@ def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): fobj must be an open file object, open rb+ or equivalent. Mutagen tries to use mmap to resize the file, but falls back to a significantly slower method if mmap fails. + + Args: + fobj (fileobj) + size (int): The amount of space to delete + offset (int): The start of the space to delete + Raises: + IOError """ - locked = False - assert 0 < size - assert 0 <= offset + if size < 0 or offset < 0: + raise ValueError + fobj.seek(0, 2) filesize = fobj.tell() movesize = filesize - offset - size - assert 0 <= movesize - try: - if movesize > 0: - fobj.flush() - try: - import mmap - file_map = mmap.mmap(fobj.fileno(), filesize) - try: - file_map.move(offset, offset + size, movesize) - finally: - file_map.close() - except (ValueError, EnvironmentError, ImportError): - # handle broken mmap scenarios - locked = lock(fobj) - fobj.seek(offset + size) - buf = fobj.read(BUFFER_SIZE) - while buf: - fobj.seek(offset) - fobj.write(buf) - offset += len(buf) - fobj.seek(offset + size) - buf = fobj.read(BUFFER_SIZE) - fobj.truncate(filesize - size) - fobj.flush() - finally: - if locked: - unlock(fobj) + + if movesize < 0: + raise ValueError + + if mmap is not None: + try: + mmap_move(fobj, offset, offset + size, movesize) + except mmap.error: + fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) + else: + fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) + + resize_file(fobj, -size, BUFFER_SIZE) + + +def resize_bytes(fobj, old_size, new_size, offset): + """Resize an area in a file adding and deleting at the end of it. + Does nothing if no resizing is needed. + + Args: + fobj (fileobj) + old_size (int): The area starting at offset + new_size (int): The new size of the area + offset (int): The start of the area + Raises: + IOError + """ + + if new_size < old_size: + delete_size = old_size - new_size + delete_at = offset + new_size + delete_bytes(fobj, delete_size, delete_at) + elif new_size > old_size: + insert_size = new_size - old_size + insert_at = offset + old_size + insert_bytes(fobj, insert_size, insert_at) def dict_match(d, key, default=None): """Like __getitem__ but works as if the keys() are all filename patterns. Returns the value of any dict key that matches the passed key. + + Args: + d (dict): A dict with filename patterns as keys + key (str): A key potentially matching any of the keys + default (object): The object to return if no pattern matched the + passed in key + Returns: + object: The dict value where the dict key matched the passed in key. + Or default if there was no match. """ if key in d and "[" not in key: @@ -428,15 +901,57 @@ def dict_match(d, key, default=None): return default +def encode_endian(text, encoding, errors="strict", le=True): + """Like text.encode(encoding) but always returns little endian/big endian + BOMs instead of the system one. + + Args: + text (text) + encoding (str) + errors (str) + le (boolean): if little endian + Returns: + bytes + Raises: + UnicodeEncodeError + LookupError + """ + + encoding = codecs.lookup(encoding).name + + if encoding == "utf-16": + if le: + return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors) + else: + return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors) + elif encoding == "utf-32": + if le: + return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors) + else: + return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors) + else: + return text.encode(encoding, errors) + + def decode_terminated(data, encoding, strict=True): """Returns the decoded data until the first NULL terminator and all data after it. - In case the data can't be decoded raises UnicodeError. - In case the encoding is not found raises LookupError. - In case the data isn't null terminated (even if it is encoded correctly) - raises ValueError except if strict is False, then the decoded string - will be returned anyway. + Args: + data (bytes): data to decode + encoding (str): The codec to use + strict (bool): If True will raise ValueError in case no NULL is found + but the available data decoded successfully. + Returns: + Tuple[`text`, `bytes`]: A tuple containing the decoded text and the + remaining data after the found NULL termination. + + Raises: + UnicodeError: In case the data can't be decoded. + LookupError:In case the encoding is not found. + ValueError: In case the data isn't null terminated (even if it is + encoded correctly) except if strict is False, then the decoded + string will be returned anyway. """ codec_info = codecs.lookup(encoding) @@ -472,47 +987,6 @@ def decode_terminated(data, encoding, strict=True): return u"".join(r), b"" -def split_escape(string, sep, maxsplit=None, escape_char="\\"): - """Like unicode/str/bytes.split but allows for the separator to be escaped - - If passed unicode/str/bytes will only return list of unicode/str/bytes. - """ - - assert len(sep) == 1 - assert len(escape_char) == 1 - - if isinstance(string, bytes): - if isinstance(escape_char, text_type): - escape_char = escape_char.encode("ascii") - iter_ = iterbytes - else: - iter_ = iter - - if maxsplit is None: - maxsplit = len(string) - - empty = string[:0] - result = [] - current = empty - escaped = False - for char in iter_(string): - if escaped: - if char != escape_char and char != sep: - current += escape_char - current += char - escaped = False - else: - if char == escape_char: - escaped = True - elif char == sep and len(result) < maxsplit: - result.append(current) - current = empty - else: - current += char - result.append(current) - return result - - class BitReaderError(Exception): pass diff --git a/lib/mutagen/_vorbis.py b/lib/mutagen/_vorbis.py old mode 100644 new mode 100755 index b5cd9254..5831453c --- a/lib/mutagen/_vorbis.py +++ b/lib/mutagen/_vorbis.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005-2006 Joe Wreschnig # 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Vorbis comment data. @@ -20,7 +20,7 @@ import sys import mutagen from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2 -from mutagen._util import DictMixin, cdata +from mutagen._util import DictMixin, cdata, MutagenError def is_valid_key(key): @@ -45,7 +45,7 @@ def is_valid_key(key): istag = is_valid_key -class error(IOError): +class error(MutagenError): pass @@ -57,7 +57,7 @@ class VorbisEncodingError(error): pass -class VComment(mutagen.Metadata, list): +class VComment(mutagen.Tags, list): """A Vorbis comment parser, accessor, and renderer. All comment ordering is preserved. A VComment is a list of @@ -68,13 +68,13 @@ class VComment(mutagen.Metadata, list): file-like object, not a filename. Attributes: - - * vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen' + vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen' """ vendor = u"Mutagen " + mutagen.version_string def __init__(self, data=None, *args, **kwargs): + self._size = 0 # Collect the args to pass to load, this lets child classes # override just load and get equivalent magic for the # constructor. @@ -83,17 +83,18 @@ class VComment(mutagen.Metadata, list): data = BytesIO(data) elif not hasattr(data, 'read'): raise TypeError("VComment requires bytes or a file-like") + start = data.tell() self.load(data, *args, **kwargs) + self._size = data.tell() - start def load(self, fileobj, errors='replace', framing=True): """Parse a Vorbis comment from a file-like object. - Keyword arguments: - - * errors: - 'strict', 'replace', or 'ignore'. This affects Unicode decoding - and how other malformed content is interpreted. - * framing -- if true, fail if a framing bit is not present + Arguments: + errors (str): 'strict', 'replace', or 'ignore'. + This affects Unicode decoding and how other malformed content + is interpreted. + framing (bool): if true, fail if a framing bit is not present Framing bits are required by the Vorbis comment specification, but are not used in FLAC Vorbis comment blocks. @@ -183,9 +184,8 @@ class VComment(mutagen.Metadata, list): Validation is always performed, so calling this function on invalid data may raise a ValueError. - Keyword arguments: - - * framing -- if true, append a framing bit (see load) + Arguments: + framing (bool): if true, append a framing bit (see load) """ self.validate() diff --git a/lib/mutagen/aac.py b/lib/mutagen/aac.py old mode 100644 new mode 100755 index 62488711..fa6f7064 --- a/lib/mutagen/aac.py +++ b/lib/mutagen/aac.py @@ -2,8 +2,9 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ * ADTS - Audio Data Transport Stream @@ -13,7 +14,9 @@ from mutagen import StreamInfo from mutagen._file import FileType -from mutagen._util import BitReader, BitReaderError, MutagenError +from mutagen._util import BitReader, BitReaderError, MutagenError, loadfile, \ + convert_error +from mutagen.id3._util import BitPaddedInt from mutagen._compat import endswith, xrange @@ -262,16 +265,16 @@ class AACError(MutagenError): class AACInfo(StreamInfo): - """AAC stream information. + """AACInfo() + + AAC stream information. + The length of the stream is just a guess and might not be correct. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bitrate -- audio bitrate, in bits per second - - The length of the stream is just a guess and might not be correct. + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bitrate (`int`): audio bitrate, in bits per second """ channels = 0 @@ -279,11 +282,13 @@ class AACInfo(StreamInfo): sample_rate = 0 bitrate = 0 + @convert_error(IOError, AACError) def __init__(self, fileobj): + """Raises AACError""" + # skip id3v2 header start_offset = 0 header = fileobj.read(10) - from mutagen.id3 import BitPaddedInt if header.startswith(b"ID3"): size = BitPaddedInt(header[6:]) start_offset = size + 10 @@ -373,24 +378,34 @@ class AACInfo(StreamInfo): self.length = float(s.samples * stream_size) / (s.size * s.frequency) def pprint(self): - return "AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( + return u"AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( self._type, self.sample_rate, self.length, self.channels, self.bitrate) class AAC(FileType): - """Load ADTS or ADIF streams containing AAC. + """AAC(filething) + + Arguments: + filething (filething) + + Load ADTS or ADIF streams containing AAC. Tagging is not supported. Use the ID3/APEv2 classes directly instead. + + Attributes: + info (`AACInfo`) """ _mimes = ["audio/x-aac"] - def load(self, filename): - self.filename = filename - with open(filename, "rb") as h: - self.info = AACInfo(h) + @loadfile() + def load(self, filething): + self.info = AACInfo(filething.fileobj) + + def add_tags(self): + raise AACError("doesn't support tags") @staticmethod def score(filename, fileobj, header): diff --git a/lib/mutagen/aiff.py b/lib/mutagen/aiff.py old mode 100644 new mode 100755 index 1618caa3..74cf5f88 --- a/lib/mutagen/aiff.py +++ b/lib/mutagen/aiff.py @@ -1,36 +1,34 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2014 Evan Purkhiser # 2014 Ben Ockmore # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """AIFF audio stream information and tags.""" -# NOTE from Ben Ockmore - according to the Py3k migration guidelines, AIFF -# chunk keys should be unicode in Py3k, and unicode or bytes in Py2k (ASCII). -# To make this easier, chunk keys should be stored internally as unicode. - +import sys import struct from struct import pack -from ._compat import endswith, text_type, PY3 +from ._compat import endswith, text_type, reraise from mutagen import StreamInfo, FileType from mutagen.id3 import ID3 -from mutagen.id3._util import error as ID3Error -from mutagen._util import insert_bytes, delete_bytes, MutagenError +from mutagen.id3._util import ID3NoHeaderError, error as ID3Error +from mutagen._util import resize_bytes, delete_bytes, MutagenError, loadfile, \ + convert_error __all__ = ["AIFF", "Open", "delete"] -class error(MutagenError, RuntimeError): +class error(MutagenError): pass -class InvalidChunk(error, IOError): +class InvalidChunk(error): pass @@ -39,14 +37,7 @@ _HUGE_VAL = 1.79769313486231e+308 def is_valid_chunk_id(id): - if not isinstance(id, text_type): - if PY3: - raise TypeError("AIFF chunk must be unicode") - - try: - id = id.decode('ascii') - except UnicodeDecodeError: - return False + assert isinstance(id, text_type) return ((len(id) <= 4) and (min(id) >= u' ') and (max(id) <= u'~')) @@ -85,37 +76,59 @@ class IFFChunk(object): self.id, self.data_size = struct.unpack('>4si', header) - if not isinstance(self.id, text_type): + try: self.id = self.id.decode('ascii') + except UnicodeDecodeError: + raise InvalidChunk() if not is_valid_chunk_id(self.id): raise InvalidChunk() self.size = self.HEADER_SIZE + self.data_size self.data_offset = fileobj.tell() - self.data = None def read(self): """Read the chunks data""" + self.__fileobj.seek(self.data_offset) - self.data = self.__fileobj.read(self.data_size) + return self.__fileobj.read(self.data_size) + + def write(self, data): + """Write the chunk data""" + + if len(data) > self.data_size: + raise ValueError + + self.__fileobj.seek(self.data_offset) + self.__fileobj.write(data) def delete(self): """Removes the chunk from the file""" + delete_bytes(self.__fileobj, self.size, self.offset) if self.parent_chunk is not None: - self.parent_chunk.resize(self.parent_chunk.data_size - self.size) + self.parent_chunk._update_size( + self.parent_chunk.data_size - self.size) - def resize(self, data_size): + def _update_size(self, data_size): """Update the size of the chunk""" + self.__fileobj.seek(self.offset + 4) self.__fileobj.write(pack('>I', data_size)) if self.parent_chunk is not None: size_diff = self.data_size - data_size - self.parent_chunk.resize(self.parent_chunk.data_size - size_diff) + self.parent_chunk._update_size( + self.parent_chunk.data_size - size_diff) self.data_size = data_size self.size = data_size + self.HEADER_SIZE + def resize(self, new_data_size): + """Resize the file and update the chunk sizes""" + + resize_bytes( + self.__fileobj, self.data_size, new_data_size, self.data_offset) + self._update_size(new_data_size) + class IFFFile(object): """Representation of a IFF file""" @@ -154,8 +167,7 @@ class IFFFile(object): def __contains__(self, id_): """Check if the IFF file contains a specific chunk""" - if not isinstance(id_, text_type): - id_ = id_.decode('ascii') + assert isinstance(id_, text_type) if not is_valid_chunk_id(id_): raise KeyError("AIFF key must be four ASCII characters.") @@ -165,8 +177,7 @@ class IFFFile(object): def __getitem__(self, id_): """Get a chunk from the IFF file""" - if not isinstance(id_, text_type): - id_ = id_.decode('ascii') + assert isinstance(id_, text_type) if not is_valid_chunk_id(id_): raise KeyError("AIFF key must be four ASCII characters.") @@ -175,13 +186,12 @@ class IFFFile(object): return self.__chunks[id_] except KeyError: raise KeyError( - "%r has no %r chunk" % (self.__fileobj.name, id_)) + "%r has no %r chunk" % (self.__fileobj, id_)) def __delitem__(self, id_): """Remove a chunk from the IFF file""" - if not isinstance(id_, text_type): - id_ = id_.decode('ascii') + assert isinstance(id_, text_type) if not is_valid_chunk_id(id_): raise KeyError("AIFF key must be four ASCII characters.") @@ -191,8 +201,7 @@ class IFFFile(object): def insert_chunk(self, id_): """Insert a new chunk at the end of the IFF file""" - if not isinstance(id_, text_type): - id_ = id_.decode('ascii') + assert isinstance(id_, text_type) if not is_valid_chunk_id(id_): raise KeyError("AIFF key must be four ASCII characters.") @@ -201,24 +210,25 @@ class IFFFile(object): self.__fileobj.write(pack('>4si', id_.ljust(4).encode('ascii'), 0)) self.__fileobj.seek(self.__next_offset) chunk = IFFChunk(self.__fileobj, self[u'FORM']) - self[u'FORM'].resize(self[u'FORM'].data_size + chunk.size) + self[u'FORM']._update_size(self[u'FORM'].data_size + chunk.size) self.__chunks[id_] = chunk self.__next_offset = chunk.offset + chunk.size class AIFFInfo(StreamInfo): - """AIFF audio stream information. + """AIFFInfo() + + AIFF audio stream information. Information is parsed from the COMM chunk of the AIFF file - Useful attributes: - - * length -- audio length, in seconds - * bitrate -- audio bitrate, in bits per second - * channels -- The number of audio channels - * sample_rate -- audio sample rate, in Hz - * sample_size -- The audio sample size + Attributes: + length (`float`): audio length, in seconds + bitrate (`int`): audio bitrate, in bits per second + channels (`int`): The number of audio channels + sample_rate (`int`): audio sample rate, in Hz + sample_size (`int`): The audio sample size """ length = 0 @@ -226,16 +236,21 @@ class AIFFInfo(StreamInfo): channels = 0 sample_rate = 0 + @convert_error(IOError, error) def __init__(self, fileobj): + """Raises error""" + iff = IFFFile(fileobj) try: common_chunk = iff[u'COMM'] except KeyError as e: raise error(str(e)) - common_chunk.read() + data = common_chunk.read() + if len(data) < 18: + raise error - info = struct.unpack('>hLh10s', common_chunk.data[:18]) + info = struct.unpack('>hLh10s', data[:18]) channels, frame_count, sample_size, sample_rate = info self.sample_rate = int(read_float(sample_rate)) @@ -245,86 +260,78 @@ class AIFFInfo(StreamInfo): self.length = frame_count / float(self.sample_rate) def pprint(self): - return "%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( + return u"%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( self.channels, self.bitrate, self.sample_rate, self.length) class _IFFID3(ID3): """A AIFF file with ID3v2 tags""" - def _load_header(self): + def _pre_load_header(self, fileobj): try: - self._fileobj.seek(IFFFile(self._fileobj)[u'ID3'].data_offset) + fileobj.seek(IFFFile(fileobj)[u'ID3'].data_offset) except (InvalidChunk, KeyError): - raise ID3Error() - super(_IFFID3, self)._load_header() + raise ID3NoHeaderError("No ID3 chunk") - def save(self, filename=None, v2_version=4, v23_sep='/'): + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, v2_version=4, v23_sep='/', padding=None): """Save ID3v2 data to the AIFF file""" - framedata = self._prepare_framedata(v2_version, v23_sep) - framesize = len(framedata) + fileobj = filething.fileobj - if filename is None: - filename = self.filename - - # Unlike the parent ID3.save method, we won't save to a blank file - # since we would have to construct a empty AIFF file - fileobj = open(filename, 'rb+') iff_file = IFFFile(fileobj) + if u'ID3' not in iff_file: + iff_file.insert_chunk(u'ID3') + + chunk = iff_file[u'ID3'] + try: - if u'ID3' not in iff_file: - iff_file.insert_chunk(u'ID3') + data = self._prepare_data( + fileobj, chunk.data_offset, chunk.data_size, v2_version, + v23_sep, padding) + except ID3Error as e: + reraise(error, e, sys.exc_info()[2]) - chunk = iff_file[u'ID3'] - fileobj.seek(chunk.data_offset) + new_size = len(data) + new_size += new_size % 2 # pad byte + assert new_size % 2 == 0 + chunk.resize(new_size) + data += (new_size - len(data)) * b'\x00' + assert new_size == len(data) + chunk.write(data) - header = fileobj.read(10) - header = self._prepare_id3_header(header, framesize, v2_version) - header, new_size, _ = header - - data = header + framedata + (b'\x00' * (new_size - framesize)) - - # Include ID3 header size in 'new_size' calculation - new_size += 10 - - # Expand the chunk if necessary, including pad byte - if new_size > chunk.size: - insert_at = chunk.offset + chunk.size - insert_size = new_size - chunk.size + new_size % 2 - insert_bytes(fileobj, insert_size, insert_at) - chunk.resize(new_size) - - fileobj.seek(chunk.data_offset) - fileobj.write(data) - finally: - fileobj.close() - - def delete(self, filename=None): + @loadfile(writable=True) + def delete(self, filething): """Completely removes the ID3 chunk from the AIFF file""" - if filename is None: - filename = self.filename - delete(filename) + delete(filething) self.clear() -def delete(filename): +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): """Completely removes the ID3 chunk from the AIFF file""" - with open(filename, "rb+") as file_: - try: - del IFFFile(file_)[u'ID3'] - except KeyError: - pass + try: + del IFFFile(filething.fileobj)[u'ID3'] + except KeyError: + pass class AIFF(FileType): - """An AIFF audio file. + """AIFF(filething) - :ivar info: :class:`AIFFInfo` - :ivar tags: :class:`ID3` + An AIFF audio file. + + Arguments: + filething (filething) + + Attributes: + tags (`mutagen.id3.ID3`) + info (`AIFFInfo`) """ _mimes = ["audio/aiff", "audio/x-aiff"] @@ -343,20 +350,24 @@ class AIFF(FileType): else: raise error("an ID3 tag already exists") - def load(self, filename, **kwargs): + @convert_error(IOError, error) + @loadfile() + def load(self, filething, **kwargs): """Load stream and tag information from a file.""" - self.filename = filename + + fileobj = filething.fileobj try: - self.tags = _IFFID3(filename, **kwargs) - except ID3Error: + self.tags = _IFFID3(fileobj, **kwargs) + except ID3NoHeaderError: self.tags = None + except ID3Error as e: + raise error(e) + else: + self.tags.filename = self.filename - try: - fileobj = open(filename, "rb") - self.info = AIFFInfo(fileobj) - finally: - fileobj.close() + fileobj.seek(0, 0) + self.info = AIFFInfo(fileobj) Open = AIFF diff --git a/lib/mutagen/apev2.py b/lib/mutagen/apev2.py old mode 100644 new mode 100755 index 6eee43ce..f4d5c5da --- a/lib/mutagen/apev2.py +++ b/lib/mutagen/apev2.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """APEv2 reading and writing. @@ -37,8 +37,8 @@ from collections import MutableSequence from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string, xrange) from mutagen import Metadata, FileType, StreamInfo -from mutagen._util import (DictMixin, cdata, delete_bytes, total_ordering, - MutagenError) +from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering, \ + MutagenError, loadfile, convert_error, seek_end, get_size def is_valid_apev2_key(key): @@ -61,26 +61,26 @@ def is_valid_apev2_key(key): # 1: Item contains binary information # 2: Item is a locator of external stored information [e.g. URL] # 3: reserved" -TEXT, BINARY, EXTERNAL = range(3) +TEXT, BINARY, EXTERNAL = xrange(3) HAS_HEADER = 1 << 31 HAS_NO_FOOTER = 1 << 30 IS_HEADER = 1 << 29 -class error(IOError, MutagenError): +class error(MutagenError): pass -class APENoHeaderError(error, ValueError): +class APENoHeaderError(error): pass -class APEUnsupportedVersionError(error, ValueError): +class APEUnsupportedVersionError(error): pass -class APEBadItemError(error, ValueError): +class APEBadItemError(error): pass @@ -103,6 +103,8 @@ class _APEv2Data(object): is_at_start = False def __init__(self, fileobj): + """Raises IOError and apev2.error""" + self.__find_metadata(fileobj) if self.header is None: @@ -137,6 +139,8 @@ class _APEv2Data(object): # Check for an APEv2 tag followed by an ID3v1 tag at the end. try: + if get_size(fileobj) < 128: + raise IOError fileobj.seek(-128, 2) if fileobj.read(3) == b"TAG": @@ -173,11 +177,18 @@ class _APEv2Data(object): self.header = 0 def __fill_missing(self, fileobj): + """Raises IOError and apev2.error""" + fileobj.seek(self.metadata + 8) - self.version = fileobj.read(4) - self.size = cdata.uint_le(fileobj.read(4)) - self.items = cdata.uint_le(fileobj.read(4)) - self.flags = cdata.uint_le(fileobj.read(4)) + + data = fileobj.read(16) + if len(data) != 16: + raise error + + self.version = data[:4] + self.size = cdata.uint32_le(data[4:8]) + self.items = cdata.uint32_le(data[8:12]) + self.flags = cdata.uint32_le(data[12:]) if self.header is not None: self.data = self.header + 32 @@ -256,7 +267,9 @@ class _CIDictProxy(DictMixin): class APEv2(_CIDictProxy, Metadata): - """A file with an APEv2 tag. + """APEv2(filething=None) + + A file with an APEv2 tag. ID3v1 tags are silently ignored and overwritten. """ @@ -269,15 +282,16 @@ class APEv2(_CIDictProxy, Metadata): items = sorted(self.items()) return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) - def load(self, filename): - """Load tags from a filename.""" + @convert_error(IOError, error) + @loadfile() + def load(self, filething): + """Load tags from a filename. + + Raises apev2.error + """ + + data = _APEv2Data(filething.fileobj) - self.filename = filename - fileobj = open(filename, "rb") - try: - data = _APEv2Data(fileobj) - finally: - fileobj.close() if data.tag: self.clear() self.__parse_tag(data.tag, data.items) @@ -285,33 +299,45 @@ class APEv2(_CIDictProxy, Metadata): raise APENoHeaderError("No APE tag found") def __parse_tag(self, tag, count): + """Raises IOError and APEBadItemError""" + fileobj = cBytesIO(tag) for i in xrange(count): - size_data = fileobj.read(4) + tag_data = fileobj.read(8) # someone writes wrong item counts - if not size_data: + if not tag_data: break - size = cdata.uint_le(size_data) - flags = cdata.uint_le(fileobj.read(4)) + if len(tag_data) != 8: + raise error + size = cdata.uint32_le(tag_data[:4]) + flags = cdata.uint32_le(tag_data[4:8]) # Bits 1 and 2 bits are flags, 0-3 # Bit 0 is read/write flag, ignored kind = (flags & 6) >> 1 if kind == 3: raise APEBadItemError("value type must be 0, 1, or 2") + key = value = fileobj.read(1) + if not key: + raise APEBadItemError while key[-1:] != b'\x00' and value: value = fileobj.read(1) + if not value: + raise APEBadItemError key += value if key[-1:] == b"\x00": key = key[:-1] + if PY3: try: key = key.decode("ascii") except UnicodeError as err: reraise(APEBadItemError, err, sys.exc_info()[2]) value = fileobj.read(size) + if len(value) != size: + raise APEBadItemError value = _get_value_type(kind)._new(value) @@ -391,7 +417,9 @@ class APEv2(_CIDictProxy, Metadata): super(APEv2, self).__setitem__(key, value) - def save(self, filename=None): + @convert_error(IOError, error) + @loadfile(writable=True, create=True) + def save(self, filething): """Save changes to a file. If no filename is given, the one most recently loaded is used. @@ -400,11 +428,8 @@ class APEv2(_CIDictProxy, Metadata): a header and a footer. """ - filename = filename or self.filename - try: - fileobj = open(filename, "r+b") - except IOError: - fileobj = open(filename, "w+b") + fileobj = filething.fileobj + data = _APEv2Data(fileobj) if data.is_at_start: @@ -453,32 +478,41 @@ class APEv2(_CIDictProxy, Metadata): footer += b"\0" * 8 fileobj.write(footer) - fileobj.close() - def delete(self, filename=None): + @convert_error(IOError, error) + @loadfile(writable=True) + def delete(self, filething): """Remove tags from a file.""" - filename = filename or self.filename - fileobj = open(filename, "r+b") - try: - data = _APEv2Data(fileobj) - if data.start is not None and data.size is not None: - delete_bytes(fileobj, data.end - data.start, data.start) - finally: - fileobj.close() + fileobj = filething.fileobj + data = _APEv2Data(fileobj) + if data.start is not None and data.size is not None: + delete_bytes(fileobj, data.end - data.start, data.start) self.clear() Open = APEv2 -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """delete(filething) + + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ try: - APEv2(filename).delete() + t = APEv2(filething) except APENoHeaderError: - pass + return + filething.fileobj.seek(0) + t.delete(filething) def _get_value_type(kind): @@ -679,6 +713,15 @@ class APEExtValue(_APEUtf8Value): class APEv2File(FileType): + """APEv2File(filething) + + Arguments: + filething (filething) + + Attributes: + tags (`APEv2`) + """ + class _Info(StreamInfo): length = 0 bitrate = 0 @@ -690,11 +733,18 @@ class APEv2File(FileType): def pprint(): return u"Unknown format with APEv2 tag." - def load(self, filename): - self.filename = filename - self.info = self._Info(open(filename, "rb")) + @loadfile() + def load(self, filething): + fileobj = filething.fileobj + + self.info = self._Info(fileobj) try: - self.tags = APEv2(filename) + fileobj.seek(0, 0) + except IOError as e: + raise error(e) + + try: + self.tags = APEv2(fileobj) except APENoHeaderError: self.tags = None @@ -702,13 +752,13 @@ class APEv2File(FileType): if self.tags is None: self.tags = APEv2() else: - raise ValueError("%r already has tags: %r" % (self, self.tags)) + raise error("%r already has tags: %r" % (self, self.tags)) @staticmethod def score(filename, fileobj, header): try: - fileobj.seek(-160, 2) + seek_end(fileobj, 160) + footer = fileobj.read() except IOError: - fileobj.seek(0) - footer = fileobj.read() + return -1 return ((b"APETAGEX" in footer) - header.startswith(b"ID3")) diff --git a/lib/mutagen/asf.py b/lib/mutagen/asf.py deleted file mode 100644 index 7cb7910d..00000000 --- a/lib/mutagen/asf.py +++ /dev/null @@ -1,862 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright (C) 2005-2006 Joe Wreschnig -# Copyright (C) 2006-2007 Lukas Lalinsky - -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Read and write ASF (Window Media Audio) files.""" - -__all__ = ["ASF", "Open"] - -import sys -import struct -from mutagen import FileType, Metadata, StreamInfo -from mutagen._util import (insert_bytes, delete_bytes, DictMixin, - total_ordering, MutagenError) -from ._compat import swap_to_string, text_type, PY2, string_types, reraise, \ - xrange, long_, PY3 - - -class error(IOError, MutagenError): - pass - - -class ASFError(error): - pass - - -class ASFHeaderError(error): - pass - - -class ASFInfo(StreamInfo): - """ASF stream information.""" - - def __init__(self): - self.length = 0.0 - self.sample_rate = 0 - self.bitrate = 0 - self.channels = 0 - - def pprint(self): - s = "Windows Media Audio %d bps, %s Hz, %d channels, %.2f seconds" % ( - self.bitrate, self.sample_rate, self.channels, self.length) - return s - - -class ASFTags(list, DictMixin, Metadata): - """Dictionary containing ASF attributes.""" - - def pprint(self): - return "\n".join("%s=%s" % (k, v) for k, v in self) - - def __getitem__(self, key): - """A list of values for the key. - - This is a copy, so comment['title'].append('a title') will not - work. - - """ - - # PY3 only - if isinstance(key, slice): - return list.__getitem__(self, key) - - values = [value for (k, value) in self if k == key] - if not values: - raise KeyError(key) - else: - return values - - def __delitem__(self, key): - """Delete all values associated with the key.""" - - # PY3 only - if isinstance(key, slice): - return list.__delitem__(self, key) - - to_delete = [x for x in self if x[0] == key] - if not to_delete: - raise KeyError(key) - else: - for k in to_delete: - self.remove(k) - - def __contains__(self, key): - """Return true if the key has any values.""" - for k, value in self: - if k == key: - return True - else: - return False - - def __setitem__(self, key, values): - """Set a key's value or values. - - Setting a value overwrites all old ones. The value may be a - list of Unicode or UTF-8 strings, or a single Unicode or UTF-8 - string. - - """ - - # PY3 only - if isinstance(key, slice): - return list.__setitem__(self, key, values) - - if not isinstance(values, list): - values = [values] - - to_append = [] - for value in values: - if not isinstance(value, ASFBaseAttribute): - if isinstance(value, string_types): - value = ASFUnicodeAttribute(value) - elif PY3 and isinstance(value, bytes): - value = ASFByteArrayAttribute(value) - elif isinstance(value, bool): - value = ASFBoolAttribute(value) - elif isinstance(value, int): - value = ASFDWordAttribute(value) - elif isinstance(value, long_): - value = ASFQWordAttribute(value) - else: - raise TypeError("Invalid type %r" % type(value)) - to_append.append((key, value)) - - try: - del(self[key]) - except KeyError: - pass - - self.extend(to_append) - - def keys(self): - """Return all keys in the comment.""" - return self and set(next(iter(zip(*self)))) - - def as_dict(self): - """Return a copy of the comment data in a real dict.""" - d = {} - for key, value in self: - d.setdefault(key, []).append(value) - return d - - -class ASFBaseAttribute(object): - """Generic attribute.""" - TYPE = None - - def __init__(self, value=None, data=None, language=None, - stream=None, **kwargs): - self.language = language - self.stream = stream - if data: - self.value = self.parse(data, **kwargs) - else: - if value is None: - # we used to support not passing any args and instead assign - # them later, keep that working.. - self.value = None - else: - self.value = self._validate(value) - - def _validate(self, value): - """Raises TypeError or ValueError in case the user supplied value - isn't valid. - """ - - return value - - def data_size(self): - raise NotImplementedError - - def __repr__(self): - name = "%s(%r" % (type(self).__name__, self.value) - if self.language: - name += ", language=%d" % self.language - if self.stream: - name += ", stream=%d" % self.stream - name += ")" - return name - - def render(self, name): - name = name.encode("utf-16-le") + b"\x00\x00" - data = self._render() - return (struct.pack(" 0: - texts.append(data[pos:end].decode("utf-16-le").strip(u"\x00")) - else: - texts.append(None) - pos = end - - for key, value in zip(self.NAMES, texts): - if value is not None: - value = ASFUnicodeAttribute(value=value) - asf._tags.setdefault(self.GUID, []).append((key, value)) - - def render(self, asf): - def render_text(name): - value = asf.to_content_description.get(name) - if value is not None: - return text_type(value).encode("utf-16-le") + b"\x00\x00" - else: - return b"" - - texts = [render_text(x) for x in self.NAMES] - data = struct.pack(" 0xFFFF or value.TYPE == GUID) - can_cont_desc = value.TYPE == UNICODE - - if library_only or value.language is not None: - self.to_metadata_library.append((name, value)) - elif value.stream is not None: - if name not in self.to_metadata: - self.to_metadata[name] = value - else: - self.to_metadata_library.append((name, value)) - elif name in ContentDescriptionObject.NAMES: - if name not in self.to_content_description and can_cont_desc: - self.to_content_description[name] = value - else: - self.to_metadata_library.append((name, value)) - else: - if name not in self.to_extended_content_description: - self.to_extended_content_description[name] = value - else: - self.to_metadata_library.append((name, value)) - - # Add missing objects - if not self.content_description_obj: - self.content_description_obj = \ - ContentDescriptionObject() - self.objects.append(self.content_description_obj) - if not self.extended_content_description_obj: - self.extended_content_description_obj = \ - ExtendedContentDescriptionObject() - self.objects.append(self.extended_content_description_obj) - if not self.header_extension_obj: - self.header_extension_obj = \ - HeaderExtensionObject() - self.objects.append(self.header_extension_obj) - if not self.metadata_obj: - self.metadata_obj = \ - MetadataObject() - self.header_extension_obj.objects.append(self.metadata_obj) - if not self.metadata_library_obj: - self.metadata_library_obj = \ - MetadataLibraryObject() - self.header_extension_obj.objects.append(self.metadata_library_obj) - - # Render the header - data = b"".join([obj.render(self) for obj in self.objects]) - data = (HeaderObject.GUID + - struct.pack(" self.size: - insert_bytes(fileobj, size - self.size, self.size) - if size < self.size: - delete_bytes(fileobj, self.size - size, 0) - fileobj.seek(0) - fileobj.write(data) - - self.size = size - self.num_objects = len(self.objects) - - def __read_file(self, fileobj): - header = fileobj.read(30) - if len(header) != 30 or header[:16] != HeaderObject.GUID: - raise ASFHeaderError("Not an ASF file.") - - self.extended_content_description_obj = None - self.content_description_obj = None - self.header_extension_obj = None - self.metadata_obj = None - self.metadata_library_obj = None - - self.size, self.num_objects = struct.unpack(" 0xFFFF or value.TYPE == GUID) + can_cont_desc = value.TYPE == UNICODE + + if library_only or value.language is not None: + self.to_metadata_library.append((name, value)) + elif value.stream is not None: + if name not in self.to_metadata: + self.to_metadata[name] = value + else: + self.to_metadata_library.append((name, value)) + elif name in ContentDescriptionObject.NAMES: + if name not in self.to_content_description and can_cont_desc: + self.to_content_description[name] = value + else: + self.to_metadata_library.append((name, value)) + else: + if name not in self.to_extended_content_description: + self.to_extended_content_description[name] = value + else: + self.to_metadata_library.append((name, value)) + + # Add missing objects + header = self._header + if header.get_child(ContentDescriptionObject.GUID) is None: + header.objects.append(ContentDescriptionObject()) + if header.get_child(ExtendedContentDescriptionObject.GUID) is None: + header.objects.append(ExtendedContentDescriptionObject()) + header_ext = header.get_child(HeaderExtensionObject.GUID) + if header_ext is None: + header_ext = HeaderExtensionObject() + header.objects.append(header_ext) + if header_ext.get_child(MetadataObject.GUID) is None: + header_ext.objects.append(MetadataObject()) + if header_ext.get_child(MetadataLibraryObject.GUID) is None: + header_ext.objects.append(MetadataLibraryObject()) + + fileobj = filething.fileobj + # Render to file + old_size = header.parse_size(fileobj)[0] + data = header.render_full(self, fileobj, old_size, padding) + size = len(data) + resize_bytes(fileobj, old_size, size, 0) + fileobj.seek(0) + fileobj.write(data) + + def add_tags(self): + raise ASFError + + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + self.tags.clear() + self.save(filething, padding=lambda x: 0) + + @staticmethod + def score(filename, fileobj, header): + return header.startswith(HeaderObject.GUID) * 2 + +Open = ASF diff --git a/lib/mutagen/asf/_attrs.py b/lib/mutagen/asf/_attrs.py new file mode 100755 index 00000000..d8f304af --- /dev/null +++ b/lib/mutagen/asf/_attrs.py @@ -0,0 +1,439 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2005-2006 Joe Wreschnig +# Copyright (C) 2006-2007 Lukas Lalinsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import sys +import struct + +from mutagen._compat import swap_to_string, text_type, PY2, reraise +from mutagen._util import total_ordering + +from ._util import ASFError + + +class ASFBaseAttribute(object): + """Generic attribute.""" + + TYPE = None + + _TYPES = {} + + value = None + """The Python value of this attribute (type depends on the class)""" + + language = None + """Language""" + + stream = None + """Stream""" + + def __init__(self, value=None, data=None, language=None, + stream=None, **kwargs): + self.language = language + self.stream = stream + if data: + self.value = self.parse(data, **kwargs) + else: + if value is None: + # we used to support not passing any args and instead assign + # them later, keep that working.. + self.value = None + else: + self.value = self._validate(value) + + @classmethod + def _register(cls, other): + cls._TYPES[other.TYPE] = other + return other + + @classmethod + def _get_type(cls, type_): + """Raises KeyError""" + + return cls._TYPES[type_] + + def _validate(self, value): + """Raises TypeError or ValueError in case the user supplied value + isn't valid. + """ + + return value + + def data_size(self): + raise NotImplementedError + + def __repr__(self): + name = "%s(%r" % (type(self).__name__, self.value) + if self.language: + name += ", language=%d" % self.language + if self.stream: + name += ", stream=%d" % self.stream + name += ")" + return name + + def render(self, name): + name = name.encode("utf-16-le") + b"\x00\x00" + data = self._render() + return (struct.pack("" % ( + type(self).__name__, bytes2guid(self.GUID), self.objects) + + def pprint(self): + l = [] + l.append("%s(%s)" % (type(self).__name__, bytes2guid(self.GUID))) + for o in self.objects: + for e in o.pprint().splitlines(): + l.append(" " + e) + return "\n".join(l) + + +class UnknownObject(BaseObject): + """Unknown ASF object.""" + + def __init__(self, guid): + super(UnknownObject, self).__init__() + assert isinstance(guid, bytes) + self.GUID = guid + + +@BaseObject._register +class HeaderObject(BaseObject): + """ASF header.""" + + GUID = guid2bytes("75B22630-668E-11CF-A6D9-00AA0062CE6C") + + @classmethod + def parse_full(cls, asf, fileobj): + """Raises ASFHeaderError""" + + header = cls() + + remaining_header, num_objects = cls.parse_size(fileobj) + remaining_header -= 30 + + for i in xrange(num_objects): + obj_header_size = 24 + if remaining_header < obj_header_size: + raise ASFHeaderError("invalid header size") + data = fileobj.read(obj_header_size) + if len(data) != obj_header_size: + raise ASFHeaderError("truncated") + remaining_header -= obj_header_size + + guid, size = struct.unpack("<16sQ", data) + obj = BaseObject._get_object(guid) + + payload_size = size - obj_header_size + if remaining_header < payload_size: + raise ASFHeaderError("invalid object size") + remaining_header -= payload_size + + try: + data = fileobj.read(payload_size) + except OverflowError: + # read doesn't take 64bit values + raise ASFHeaderError("invalid header size") + if len(data) != payload_size: + raise ASFHeaderError("truncated") + + obj.parse(asf, data) + header.objects.append(obj) + + return header + + @classmethod + def parse_size(cls, fileobj): + """Returns (size, num_objects) + + Raises ASFHeaderError + """ + + header = fileobj.read(30) + if len(header) != 30 or header[:16] != HeaderObject.GUID: + raise ASFHeaderError("Not an ASF file.") + + return struct.unpack("= 0 + info = PaddingInfo(available - needed_size, content_size) + + # add padding + padding = info._get_padding(padding_func) + padding_obj.parse(asf, b"\x00" * padding) + data += padding_obj.render(asf) + num_objects += 1 + + data = (HeaderObject.GUID + + struct.pack(" 0: + texts.append(data[pos:end].decode("utf-16-le").strip(u"\x00")) + else: + texts.append(None) + pos = end + + for key, value in izip(self.NAMES, texts): + if value is not None: + value = ASFUnicodeAttribute(value=value) + asf._tags.setdefault(self.GUID, []).append((key, value)) + + def render(self, asf): + def render_text(name): + value = asf.to_content_description.get(name) + if value is not None: + return text_type(value).encode("utf-16-le") + b"\x00\x00" + else: + return b"" + + texts = [render_text(x) for x in self.NAMES] + data = struct.pack("= 0 + asf.info.length = max((length / 10000000.0) - (preroll / 1000.0), 0.0) + + +@BaseObject._register +class StreamPropertiesObject(BaseObject): + """Stream properties.""" + + GUID = guid2bytes("B7DC0791-A9B7-11CF-8EE6-00C00C205365") + + def parse(self, asf, data): + super(StreamPropertiesObject, self).parse(asf, data) + channels, sample_rate, bitrate = struct.unpack("H", int(s[19:23], 16)), + p(">Q", int(s[24:], 16))[2:], + ]) + + +def bytes2guid(s): + """Converts a serialized GUID to a text GUID""" + + assert isinstance(s, bytes) + + u = struct.unpack + v = [] + v.extend(u("HQ", s[8:10] + b"\x00\x00" + s[10:])) + return "%08X-%04X-%04X-%04X-%012X" % tuple(v) + + +# Names from http://windows.microsoft.com/en-za/windows7/c00d10d1-[0-9A-F]{1,4} +CODECS = { + 0x0000: u"Unknown Wave Format", + 0x0001: u"Microsoft PCM Format", + 0x0002: u"Microsoft ADPCM Format", + 0x0003: u"IEEE Float", + 0x0004: u"Compaq Computer VSELP", + 0x0005: u"IBM CVSD", + 0x0006: u"Microsoft CCITT A-Law", + 0x0007: u"Microsoft CCITT u-Law", + 0x0008: u"Microsoft DTS", + 0x0009: u"Microsoft DRM", + 0x000A: u"Windows Media Audio 9 Voice", + 0x000B: u"Windows Media Audio 10 Voice", + 0x000C: u"OGG Vorbis", + 0x000D: u"FLAC", + 0x000E: u"MOT AMR", + 0x000F: u"Nice Systems IMBE", + 0x0010: u"OKI ADPCM", + 0x0011: u"Intel IMA ADPCM", + 0x0012: u"Videologic MediaSpace ADPCM", + 0x0013: u"Sierra Semiconductor ADPCM", + 0x0014: u"Antex Electronics G.723 ADPCM", + 0x0015: u"DSP Solutions DIGISTD", + 0x0016: u"DSP Solutions DIGIFIX", + 0x0017: u"Dialogic OKI ADPCM", + 0x0018: u"MediaVision ADPCM", + 0x0019: u"Hewlett-Packard CU codec", + 0x001A: u"Hewlett-Packard Dynamic Voice", + 0x0020: u"Yamaha ADPCM", + 0x0021: u"Speech Compression SONARC", + 0x0022: u"DSP Group True Speech", + 0x0023: u"Echo Speech EchoSC1", + 0x0024: u"Ahead Inc. Audiofile AF36", + 0x0025: u"Audio Processing Technology APTX", + 0x0026: u"Ahead Inc. AudioFile AF10", + 0x0027: u"Aculab Prosody 1612", + 0x0028: u"Merging Technologies S.A. LRC", + 0x0030: u"Dolby Labs AC2", + 0x0031: u"Microsoft GSM 6.10", + 0x0032: u"Microsoft MSNAudio", + 0x0033: u"Antex Electronics ADPCME", + 0x0034: u"Control Resources VQLPC", + 0x0035: u"DSP Solutions Digireal", + 0x0036: u"DSP Solutions DigiADPCM", + 0x0037: u"Control Resources CR10", + 0x0038: u"Natural MicroSystems VBXADPCM", + 0x0039: u"Crystal Semiconductor IMA ADPCM", + 0x003A: u"Echo Speech EchoSC3", + 0x003B: u"Rockwell ADPCM", + 0x003C: u"Rockwell DigiTalk", + 0x003D: u"Xebec Multimedia Solutions", + 0x0040: u"Antex Electronics G.721 ADPCM", + 0x0041: u"Antex Electronics G.728 CELP", + 0x0042: u"Intel G.723", + 0x0043: u"Intel G.723.1", + 0x0044: u"Intel G.729 Audio", + 0x0045: u"Sharp G.726 Audio", + 0x0050: u"Microsoft MPEG-1", + 0x0052: u"InSoft RT24", + 0x0053: u"InSoft PAC", + 0x0055: u"MP3 - MPEG Layer III", + 0x0059: u"Lucent G.723", + 0x0060: u"Cirrus Logic", + 0x0061: u"ESS Technology ESPCM", + 0x0062: u"Voxware File-Mode", + 0x0063: u"Canopus Atrac", + 0x0064: u"APICOM G.726 ADPCM", + 0x0065: u"APICOM G.722 ADPCM", + 0x0066: u"Microsoft DSAT", + 0x0067: u"Microsoft DSAT Display", + 0x0069: u"Voxware Byte Aligned", + 0x0070: u"Voxware AC8", + 0x0071: u"Voxware AC10", + 0x0072: u"Voxware AC16", + 0x0073: u"Voxware AC20", + 0x0074: u"Voxware RT24 MetaVoice", + 0x0075: u"Voxware RT29 MetaSound", + 0x0076: u"Voxware RT29HW", + 0x0077: u"Voxware VR12", + 0x0078: u"Voxware VR18", + 0x0079: u"Voxware TQ40", + 0x007A: u"Voxware SC3", + 0x007B: u"Voxware SC3", + 0x0080: u"Softsound", + 0x0081: u"Voxware TQ60", + 0x0082: u"Microsoft MSRT24", + 0x0083: u"AT&T Labs G.729A", + 0x0084: u"Motion Pixels MVI MV12", + 0x0085: u"DataFusion Systems G.726", + 0x0086: u"DataFusion Systems GSM610", + 0x0088: u"Iterated Systems ISIAudio", + 0x0089: u"Onlive", + 0x008A: u"Multitude FT SX20", + 0x008B: u"Infocom ITS ACM G.721", + 0x008C: u"Convedia G.729", + 0x008D: u"Congruency Audio", + 0x0091: u"Siemens Business Communications SBC24", + 0x0092: u"Sonic Foundry Dolby AC3 SPDIF", + 0x0093: u"MediaSonic G.723", + 0x0094: u"Aculab Prosody 8KBPS", + 0x0097: u"ZyXEL ADPCM", + 0x0098: u"Philips LPCBB", + 0x0099: u"Studer Professional Audio AG Packed", + 0x00A0: u"Malden Electronics PHONYTALK", + 0x00A1: u"Racal Recorder GSM", + 0x00A2: u"Racal Recorder G720.a", + 0x00A3: u"Racal Recorder G723.1", + 0x00A4: u"Racal Recorder Tetra ACELP", + 0x00B0: u"NEC AAC", + 0x00FF: u"CoreAAC Audio", + 0x0100: u"Rhetorex ADPCM", + 0x0101: u"BeCubed Software IRAT", + 0x0111: u"Vivo G.723", + 0x0112: u"Vivo Siren", + 0x0120: u"Philips CELP", + 0x0121: u"Philips Grundig", + 0x0123: u"Digital G.723", + 0x0125: u"Sanyo ADPCM", + 0x0130: u"Sipro Lab Telecom ACELP.net", + 0x0131: u"Sipro Lab Telecom ACELP.4800", + 0x0132: u"Sipro Lab Telecom ACELP.8V3", + 0x0133: u"Sipro Lab Telecom ACELP.G.729", + 0x0134: u"Sipro Lab Telecom ACELP.G.729A", + 0x0135: u"Sipro Lab Telecom ACELP.KELVIN", + 0x0136: u"VoiceAge AMR", + 0x0140: u"Dictaphone G.726 ADPCM", + 0x0141: u"Dictaphone CELP68", + 0x0142: u"Dictaphone CELP54", + 0x0150: u"Qualcomm PUREVOICE", + 0x0151: u"Qualcomm HALFRATE", + 0x0155: u"Ring Zero Systems TUBGSM", + 0x0160: u"Windows Media Audio Standard", + 0x0161: u"Windows Media Audio 9 Standard", + 0x0162: u"Windows Media Audio 9 Professional", + 0x0163: u"Windows Media Audio 9 Lossless", + 0x0164: u"Windows Media Audio Pro over SPDIF", + 0x0170: u"Unisys NAP ADPCM", + 0x0171: u"Unisys NAP ULAW", + 0x0172: u"Unisys NAP ALAW", + 0x0173: u"Unisys NAP 16K", + 0x0174: u"Sycom ACM SYC008", + 0x0175: u"Sycom ACM SYC701 G725", + 0x0176: u"Sycom ACM SYC701 CELP54", + 0x0177: u"Sycom ACM SYC701 CELP68", + 0x0178: u"Knowledge Adventure ADPCM", + 0x0180: u"Fraunhofer IIS MPEG-2 AAC", + 0x0190: u"Digital Theater Systems DTS", + 0x0200: u"Creative Labs ADPCM", + 0x0202: u"Creative Labs FastSpeech8", + 0x0203: u"Creative Labs FastSpeech10", + 0x0210: u"UHER informatic GmbH ADPCM", + 0x0215: u"Ulead DV Audio", + 0x0216: u"Ulead DV Audio", + 0x0220: u"Quarterdeck", + 0x0230: u"I-link Worldwide ILINK VC", + 0x0240: u"Aureal Semiconductor RAW SPORT", + 0x0249: u"Generic Passthru", + 0x0250: u"Interactive Products HSX", + 0x0251: u"Interactive Products RPELP", + 0x0260: u"Consistent Software CS2", + 0x0270: u"Sony SCX", + 0x0271: u"Sony SCY", + 0x0272: u"Sony ATRAC3", + 0x0273: u"Sony SPC", + 0x0280: u"Telum Audio", + 0x0281: u"Telum IA Audio", + 0x0285: u"Norcom Voice Systems ADPCM", + 0x0300: u"Fujitsu TOWNS SND", + 0x0350: u"Micronas SC4 Speech", + 0x0351: u"Micronas CELP833", + 0x0400: u"Brooktree BTV Digital", + 0x0401: u"Intel Music Coder", + 0x0402: u"Intel Audio", + 0x0450: u"QDesign Music", + 0x0500: u"On2 AVC0 Audio", + 0x0501: u"On2 AVC1 Audio", + 0x0680: u"AT&T Labs VME VMPCM", + 0x0681: u"AT&T Labs TPC", + 0x08AE: u"ClearJump Lightwave Lossless", + 0x1000: u"Olivetti GSM", + 0x1001: u"Olivetti ADPCM", + 0x1002: u"Olivetti CELP", + 0x1003: u"Olivetti SBC", + 0x1004: u"Olivetti OPR", + 0x1100: u"Lernout & Hauspie", + 0x1101: u"Lernout & Hauspie CELP", + 0x1102: u"Lernout & Hauspie SBC8", + 0x1103: u"Lernout & Hauspie SBC12", + 0x1104: u"Lernout & Hauspie SBC16", + 0x1400: u"Norris Communication", + 0x1401: u"ISIAudio", + 0x1500: u"AT&T Labs Soundspace Music Compression", + 0x1600: u"Microsoft MPEG ADTS AAC", + 0x1601: u"Microsoft MPEG RAW AAC", + 0x1608: u"Nokia MPEG ADTS AAC", + 0x1609: u"Nokia MPEG RAW AAC", + 0x181C: u"VoxWare MetaVoice RT24", + 0x1971: u"Sonic Foundry Lossless", + 0x1979: u"Innings Telecom ADPCM", + 0x1FC4: u"NTCSoft ALF2CD ACM", + 0x2000: u"Dolby AC3", + 0x2001: u"DTS", + 0x4143: u"Divio AAC", + 0x4201: u"Nokia Adaptive Multi-Rate", + 0x4243: u"Divio G.726", + 0x4261: u"ITU-T H.261", + 0x4263: u"ITU-T H.263", + 0x4264: u"ITU-T H.264", + 0x674F: u"Ogg Vorbis Mode 1", + 0x6750: u"Ogg Vorbis Mode 2", + 0x6751: u"Ogg Vorbis Mode 3", + 0x676F: u"Ogg Vorbis Mode 1+", + 0x6770: u"Ogg Vorbis Mode 2+", + 0x6771: u"Ogg Vorbis Mode 3+", + 0x7000: u"3COM NBX Audio", + 0x706D: u"FAAD AAC Audio", + 0x77A1: u"True Audio Lossless Audio", + 0x7A21: u"GSM-AMR CBR 3GPP Audio", + 0x7A22: u"GSM-AMR VBR 3GPP Audio", + 0xA100: u"Comverse Infosys G723.1", + 0xA101: u"Comverse Infosys AVQSBC", + 0xA102: u"Comverse Infosys SBC", + 0xA103: u"Symbol Technologies G729a", + 0xA104: u"VoiceAge AMR WB", + 0xA105: u"Ingenient Technologies G.726", + 0xA106: u"ISO/MPEG-4 Advanced Audio Coding (AAC)", + 0xA107: u"Encore Software Ltd's G.726", + 0xA108: u"ZOLL Medical Corporation ASAO", + 0xA109: u"Speex Voice", + 0xA10A: u"Vianix MASC Speech Compression", + 0xA10B: u"Windows Media 9 Spectrum Analyzer Output", + 0xA10C: u"Media Foundation Spectrum Analyzer Output", + 0xA10D: u"GSM 6.10 (Full-Rate) Speech", + 0xA10E: u"GSM 6.20 (Half-Rate) Speech", + 0xA10F: u"GSM 6.60 (Enchanced Full-Rate) Speech", + 0xA110: u"GSM 6.90 (Adaptive Multi-Rate) Speech", + 0xA111: u"GSM Adaptive Multi-Rate WideBand Speech", + 0xA112: u"Polycom G.722", + 0xA113: u"Polycom G.728", + 0xA114: u"Polycom G.729a", + 0xA115: u"Polycom Siren", + 0xA116: u"Global IP Sound ILBC", + 0xA117: u"Radio Time Time Shifted Radio", + 0xA118: u"Nice Systems ACA", + 0xA119: u"Nice Systems ADPCM", + 0xA11A: u"Vocord Group ITU-T G.721", + 0xA11B: u"Vocord Group ITU-T G.726", + 0xA11C: u"Vocord Group ITU-T G.722.1", + 0xA11D: u"Vocord Group ITU-T G.728", + 0xA11E: u"Vocord Group ITU-T G.729", + 0xA11F: u"Vocord Group ITU-T G.729a", + 0xA120: u"Vocord Group ITU-T G.723.1", + 0xA121: u"Vocord Group LBC", + 0xA122: u"Nice G.728", + 0xA123: u"France Telecom G.729 ACM Audio", + 0xA124: u"CODIAN Audio", + 0xCC12: u"Intel YUV12 Codec", + 0xCFCC: u"Digital Processing Systems Perception Motion JPEG", + 0xD261: u"DEC H.261", + 0xD263: u"DEC H.263", + 0xFFFE: u"Extensible Wave Format", + 0xFFFF: u"Unregistered", +} diff --git a/lib/mutagen/dsf.py b/lib/mutagen/dsf.py new file mode 100755 index 00000000..ed5faae2 --- /dev/null +++ b/lib/mutagen/dsf.py @@ -0,0 +1,358 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2017 Boris Pruessmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Read and write DSF audio stream information and tags.""" + + +import sys +import struct + +from ._compat import cBytesIO, reraise, endswith + +from mutagen import FileType, StreamInfo +from mutagen._util import cdata, MutagenError, loadfile, convert_error +from mutagen.id3 import ID3 +from mutagen.id3._util import ID3NoHeaderError, error as ID3Error + + +__all__ = ["DSF", "Open", "delete"] + + +class error(MutagenError): + pass + + +class DSFChunk(object): + """A generic chunk of a DSFFile.""" + + chunk_offset = 0 + chunk_header = " " + chunk_size = -1 + + def __init__(self, fileobj, create=False): + self.fileobj = fileobj + + if not create: + self.chunk_offset = fileobj.tell() + self.load() + + def load(self): + raise NotImplementedError + + def write(self): + raise NotImplementedError + + +class DSDChunk(DSFChunk): + """Represents the first chunk of a DSF file""" + + CHUNK_SIZE = 28 + + total_size = 0 + offset_metdata_chunk = 0 + + def __init__(self, fileobj, create=False): + super(DSDChunk, self).__init__(fileobj, create) + + if create: + self.chunk_header = b"DSD " + self.chunk_size = DSDChunk.CHUNK_SIZE + + def load(self): + data = self.fileobj.read(DSDChunk.CHUNK_SIZE) + if len(data) != DSDChunk.CHUNK_SIZE: + raise error("DSF chunk truncated") + + self.chunk_header = data[0:4] + if self.chunk_header != b"DSD ": + raise error("DSF dsd header not found") + + self.chunk_size = cdata.ulonglong_le(data[4:12]) + if self.chunk_size != DSDChunk.CHUNK_SIZE: + raise error("DSF dsd header size mismatch") + + self.total_size = cdata.ulonglong_le(data[12:20]) + self.offset_metdata_chunk = cdata.ulonglong_le(data[20:28]) + + def write(self): + f = cBytesIO() + f.write(self.chunk_header) + f.write(struct.pack(" u'\x7f': - enc = 3 - break + enc = 0 + # Store 8859-1 if we can, per MusicBrainz spec. + for v in value: + if v and max(v) > u'\x7f': + enc = 3 + break - id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) - else: - frame.text = value + id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) def deleter(id3, key): del(id3[frameid]) @@ -175,10 +172,30 @@ class EasyID3(DictMixin, Metadata): load = property(lambda s: s.__id3.load, lambda s, v: setattr(s.__id3, 'load', v)) - def save(self, *args, **kwargs): - # ignore v2_version until we support 2.3 here - kwargs.pop("v2_version", None) - self.__id3.save(*args, **kwargs) + @loadfile(writable=True, create=True) + def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) + + Save changes to a file. + See :meth:`mutagen.id3.ID3.save` for more info. + """ + + if v2_version == 3: + # EasyID3 only works with v2.4 frames, so update_to_v23() would + # break things. We have to save a shallow copy of all tags + # and restore it after saving. Due to CHAP/CTOC copying has + # to be done recursively implemented in ID3Tags. + backup = self.__id3._copy() + try: + self.__id3.update_to_v23() + self.__id3.save( + filething, v1=v1, v2_version=v2_version, v23_sep=v23_sep, + padding=padding) + finally: + self.__id3._restore(backup) + else: + self.__id3.save(filething, v1=v1, v2_version=v2_version, + v23_sep=v23_sep, padding=padding) delete = property(lambda s: s.__id3.delete, lambda s, v: setattr(s.__id3, 'delete', v)) @@ -190,30 +207,27 @@ class EasyID3(DictMixin, Metadata): lambda s, fn: setattr(s.__id3, 'size', s)) def __getitem__(self, key): - key = key.lower() - func = dict_match(self.Get, key, self.GetFallback) + func = dict_match(self.Get, key.lower(), self.GetFallback) if func is not None: return func(self.__id3, key) else: raise EasyID3KeyError("%r is not a valid key" % key) def __setitem__(self, key, value): - key = key.lower() if PY2: if isinstance(value, basestring): value = [value] else: if isinstance(value, text_type): value = [value] - func = dict_match(self.Set, key, self.SetFallback) + func = dict_match(self.Set, key.lower(), self.SetFallback) if func is not None: return func(self.__id3, key, value) else: raise EasyID3KeyError("%r is not a valid key" % key) def __delitem__(self, key): - key = key.lower() - func = dict_match(self.Delete, key, self.DeleteFallback) + func = dict_match(self.Delete, key.lower(), self.DeleteFallback) if func is not None: return func(self.__id3, key) else: @@ -469,7 +483,7 @@ for frameid, key in iteritems({ "TIT2": "title", "TIT3": "version", "TPE1": "artist", - "TPE2": "performer", + "TPE2": "albumartist", "TPE3": "conductor", "TPE4": "arranger", "TPOS": "discnumber", @@ -518,6 +532,7 @@ for desc, key in iteritems({ u"MusicBrainz Disc Id": "musicbrainz_discid", u"ASIN": "asin", u"ALBUMARTISTSORT": "albumartistsort", + u"PERFORMER": "performer", u"BARCODE": "barcode", u"CATALOGNUMBER": "catalognumber", u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", @@ -530,5 +545,15 @@ for desc, key in iteritems({ class EasyID3FileType(ID3FileType): - """Like ID3FileType, but uses EasyID3 for tags.""" + """EasyID3FileType(filething=None) + + Like ID3FileType, but uses EasyID3 for tags. + + Arguments: + filething (filething) + + Attributes: + tags (`EasyID3`) + """ + ID3 = EasyID3 diff --git a/lib/mutagen/easymp4.py b/lib/mutagen/easymp4.py old mode 100644 new mode 100755 index 627c0e28..98530c62 --- a/lib/mutagen/easymp4.py +++ b/lib/mutagen/easymp4.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2009 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. -from mutagen import Metadata +from mutagen import Tags from mutagen._util import DictMixin, dict_match from mutagen.mp4 import MP4, MP4Tags, error, delete from ._compat import PY2, text_type, PY3 @@ -19,8 +19,10 @@ class EasyMP4KeyError(error, KeyError, ValueError): pass -class EasyMP4Tags(DictMixin, Metadata): - """A file with MPEG-4 iTunes metadata. +class EasyMP4Tags(DictMixin, Tags): + """EasyMP4Tags() + + A file with MPEG-4 iTunes metadata. Like Vorbis comments, EasyMP4Tags keys are case-insensitive ASCII strings, and values are a list of Unicode strings (and these lists @@ -40,6 +42,7 @@ class EasyMP4Tags(DictMixin, Metadata): self.load = self.__mp4.load self.save = self.__mp4.save self.delete = self.__mp4.delete + self._padding = self.__mp4._padding filename = property(lambda s: s.__mp4.filename, lambda s, fn: setattr(s.__mp4, 'filename', fn)) @@ -267,11 +270,14 @@ for name, key in { class EasyMP4(MP4): - """Like :class:`MP4 `, - but uses :class:`EasyMP4Tags` for tags. + """EasyMP4(filelike) - :ivar info: :class:`MP4Info ` - :ivar tags: :class:`EasyMP4Tags` + Like :class:`MP4 `, but uses :class:`EasyMP4Tags` for + tags. + + Attributes: + info (`mutagen.mp4.MP4Info`) + tags (`EasyMP4Tags`) """ MP4Tags = EasyMP4Tags diff --git a/lib/mutagen/flac.py b/lib/mutagen/flac.py old mode 100644 new mode 100755 index 50a8f9d5..3ce27dbb --- a/lib/mutagen/flac.py +++ b/lib/mutagen/flac.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write FLAC Vorbis comments and stream information. @@ -26,13 +26,15 @@ import struct from ._vorbis import VCommentDict import mutagen -from ._compat import cBytesIO, endswith, chr_ -from mutagen._util import insert_bytes, MutagenError -from mutagen.id3 import BitPaddedInt +from ._compat import cBytesIO, endswith, chr_, xrange +from mutagen._util import resize_bytes, MutagenError, get_size, loadfile, \ + convert_error +from mutagen._tags import PaddingInfo +from mutagen.id3._util import BitPaddedInt from functools import reduce -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -56,7 +58,8 @@ class StrictFileObject(object): def __init__(self, fileobj): self._fileobj = fileobj - for m in ["close", "tell", "seek", "write", "name"]: + for m in ["close", "tell", "seek", "write", "name", "flush", + "truncate"]: if hasattr(fileobj, m): setattr(self, m, getattr(fileobj, m)) @@ -78,11 +81,19 @@ class MetadataBlock(object): blocks, and also as a container for data blobs of unknown blocks. Attributes: - - * data -- raw binary data for this block + data (`bytes`): raw binary data for this block """ _distrust_size = False + """For block types setting this, we don't trust the size field and + use the size of the content instead.""" + + _invalid_overflow_size = -1 + """In case the real size was bigger than what is representable by the + 24 bit size field, we save the wrong specified size here. This can + only be set if _distrust_size is True""" + + _MAX_SIZE = 2 ** 24 - 1 def __init__(self, data): """Parse the given data string or file-like as a metadata block. @@ -103,41 +114,64 @@ class MetadataBlock(object): def write(self): return self.data - @staticmethod - def writeblocks(blocks): - """Render metadata block as a byte string.""" - data = [] - codes = [[block.code, block.write()] for block in blocks] - codes[-1][0] |= 128 - for code, datum in codes: - byte = chr_(code) - if len(datum) > 2 ** 24: - raise error("block is too long to write") - length = struct.pack(">I", len(datum))[-3:] - data.append(byte + length + datum) - return b"".join(data) + @classmethod + def _writeblock(cls, block, is_last=False): + """Returns the block content + header. - @staticmethod - def group_padding(blocks): - """Consolidate FLAC padding metadata blocks. - - The overall size of the rendered blocks does not change, so - this adds several bytes of padding for each merged block. + Raises error. """ - paddings = [b for b in blocks if isinstance(b, Padding)] - for p in paddings: - blocks.remove(p) - # total padding size is the sum of padding sizes plus 4 bytes - # per removed header. - size = sum(padding.length for padding in paddings) - padding = Padding() - padding.length = size + 4 * (len(paddings) - 1) - blocks.append(padding) + data = bytearray() + code = (block.code | 128) if is_last else block.code + datum = block.write() + size = len(datum) + if size > cls._MAX_SIZE: + if block._distrust_size and block._invalid_overflow_size != -1: + # The original size of this block was (1) wrong and (2) + # the real size doesn't allow us to save the file + # according to the spec (too big for 24 bit uint). Instead + # simply write back the original wrong size.. at least + # we don't make the file more "broken" as it is. + size = block._invalid_overflow_size + else: + raise error("block is too long to write") + assert not size > cls._MAX_SIZE + length = struct.pack(">I", size)[-3:] + data.append(code) + data += length + data += datum + return data + + @classmethod + def _writeblocks(cls, blocks, available, cont_size, padding_func): + """Render metadata block as a byte string.""" + + # write everything except padding + data = bytearray() + for block in blocks: + if isinstance(block, Padding): + continue + data += cls._writeblock(block) + blockssize = len(data) + + # take the padding overhead into account. we always add one + # to make things simple. + padding_block = Padding() + blockssize += len(cls._writeblock(padding_block)) + + # finally add a padding block + info = PaddingInfo(available - blockssize, cont_size) + padding_block.length = min(info._get_padding(padding_func), + cls._MAX_SIZE) + data += cls._writeblock(padding_block, is_last=True) + + return data class StreamInfo(MetadataBlock, mutagen.StreamInfo): - """FLAC stream information. + """StreamInfo() + + FLAC stream information. This contains information about the audio data in the FLAC file. Unlike most stream information objects in Mutagen, changes to this @@ -146,17 +180,18 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): attributes of this block. Attributes: - - * min_blocksize -- minimum audio block size - * max_blocksize -- maximum audio block size - * sample_rate -- audio sample rate in Hz - * channels -- audio channels (1 for mono, 2 for stereo) - * bits_per_sample -- bits per sample - * total_samples -- total samples in file - * length -- audio length in seconds + min_blocksize (`int`): minimum audio block size + max_blocksize (`int`): maximum audio block size + sample_rate (`int`): audio sample rate in Hz + channels (`int`): audio channels (1 for mono, 2 for stereo) + bits_per_sample (`int`): bits per sample + total_samples (`int`): total samples in file + length (`float`): audio length in seconds + bitrate (`int`): bitrate in bits per second, as an int """ code = 0 + bitrate = 0 def __eq__(self, other): try: @@ -224,11 +259,13 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): return f.getvalue() def pprint(self): - return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + return u"FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) class SeekPoint(tuple): - """A single seek point in a FLAC file. + """SeekPoint() + + A single seek point in a FLAC file. Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL, and byte_offset and num_samples undefined. Seek points must be @@ -238,10 +275,9 @@ class SeekPoint(tuple): may be any number of them. Attributes: - - * first_sample -- sample number of first sample in the target frame - * byte_offset -- offset from first frame to target frame - * num_samples -- number of samples in target frame + first_sample (`int`): sample number of first sample in the target frame + byte_offset (`int`): offset from first frame to target frame + num_samples (`int`): number of samples in target frame """ def __new__(cls, first_sample, byte_offset, num_samples): @@ -257,8 +293,7 @@ class SeekTable(MetadataBlock): """Read and write FLAC seek tables. Attributes: - - * seekpoints -- list of SeekPoint objects + seekpoints: list of SeekPoint objects """ __SEEKPOINT_FORMAT = '>QQH' @@ -301,7 +336,9 @@ class SeekTable(MetadataBlock): class VCFLACDict(VCommentDict): - """Read and write FLAC Vorbis comments. + """VCFLACDict() + + Read and write FLAC Vorbis comments. FLACs don't use the framing bit at the end of the comment block. So this extends VCommentDict to not use the framing bit. @@ -318,7 +355,9 @@ class VCFLACDict(VCommentDict): class CueSheetTrackIndex(tuple): - """Index for a track in a cuesheet. + """CueSheetTrackIndex(index_number, index_offset) + + Index for a track in a cuesheet. For CD-DA, an index_number of 0 corresponds to the track pre-gap. The first index in a track must have a number of 0 or 1, @@ -327,9 +366,8 @@ class CueSheetTrackIndex(tuple): divisible by 588 samples. Attributes: - - * index_number -- index point number - * index_offset -- offset in samples from track start + index_number (`int`): index point number + index_offset (`int`): offset in samples from track start """ def __new__(cls, index_number, index_offset): @@ -341,7 +379,9 @@ class CueSheetTrackIndex(tuple): class CueSheetTrack(object): - """A track in a cuesheet. + """CueSheetTrack() + + A track in a cuesheet. For CD-DA, track_numbers must be 1-99, or 170 for the lead-out. Track_numbers must be unique within a cue sheet. There @@ -349,13 +389,13 @@ class CueSheetTrack(object): which must have none. Attributes: - - * track_number -- track number - * start_offset -- track offset in samples from start of FLAC stream - * isrc -- ISRC code - * type -- 0 for audio, 1 for digital data - * pre_emphasis -- true if the track is recorded with pre-emphasis - * indexes -- list of CueSheetTrackIndex objects + track_number (`int`): track number + start_offset (`int`): track offset in samples from start of FLAC stream + isrc (`text`): ISRC code, exactly 12 characters + type (`int`): 0 for audio, 1 for digital data + pre_emphasis (`bool`): true if the track is recorded with pre-emphasis + indexes (List[`mutagen.flac.CueSheetTrackIndex`]): + list of CueSheetTrackIndex objects """ def __init__(self, track_number, start_offset, isrc='', type_=0, @@ -388,19 +428,24 @@ class CueSheetTrack(object): class CueSheet(MetadataBlock): - """Read and write FLAC embedded cue sheets. + """CueSheet() + + Read and write FLAC embedded cue sheets. Number of tracks should be from 1 to 100. There should always be exactly one lead-out track and that track must be the last track in the cue sheet. Attributes: - - * media_catalog_number -- media catalog number in ASCII - * lead_in_samples -- number of lead-in samples - * compact_disc -- true if the cuesheet corresponds to a compact disc - * tracks -- list of CueSheetTrack objects - * lead_out -- lead-out as CueSheetTrack or None if lead-out was not found + media_catalog_number (`text`): media catalog number in ASCII, + up to 128 characters + lead_in_samples (`int`): number of lead-in samples + compact_disc (`bool`): true if the cuesheet corresponds to a + compact disc + tracks (List[`mutagen.flac.CueSheetTrack`]): + list of CueSheetTrack objects + lead_out (`mutagen.flac.CueSheetTrack` or `None`): + lead-out as CueSheetTrack or None if lead-out was not found """ __CUESHEET_FORMAT = '>128sQB258xB' @@ -439,7 +484,7 @@ class CueSheet(MetadataBlock): self.lead_in_samples = lead_in_samples self.compact_disc = bool(flags & 0x80) self.tracks = [] - for i in range(num_tracks): + for i in xrange(num_tracks): track = data.read(self.__CUESHEET_TRACK_SIZE) start_offset, track_number, isrc_padded, flags, num_indexes = \ struct.unpack(self.__CUESHEET_TRACK_FORMAT, track) @@ -448,7 +493,7 @@ class CueSheet(MetadataBlock): pre_emphasis = bool(flags & 0x40) val = CueSheetTrack( track_number, start_offset, isrc, type_, pre_emphasis) - for j in range(num_indexes): + for j in xrange(num_indexes): index = data.read(self.__CUESHEET_TRACKINDEX_SIZE) index_offset, index_number = struct.unpack( self.__CUESHEET_TRACKINDEX_FORMAT, index) @@ -490,19 +535,38 @@ class CueSheet(MetadataBlock): class Picture(MetadataBlock): - """Read and write FLAC embed pictures. + """Picture() + + Read and write FLAC embed pictures. + + .. currentmodule:: mutagen Attributes: + type (`id3.PictureType`): picture type + (same as types for ID3 APIC frames) + mime (`text`): MIME type of the picture + desc (`text`): picture's description + width (`int`): width in pixels + height (`int`): height in pixels + depth (`int`): color depth in bits-per-pixel + colors (`int`): number of colors for indexed palettes (like GIF), + 0 for non-indexed + data (`bytes`): picture data - * type -- picture type (same as types for ID3 APIC frames) - * mime -- MIME type of the picture - * desc -- picture's description - * width -- width in pixels - * height -- height in pixels - * depth -- color depth in bits-per-pixel - * colors -- number of colors for indexed palettes (like GIF), - 0 for non-indexed - * data -- picture data + To create a picture from file (in order to add to a FLAC file), + instantiate this object without passing anything to the constructor and + then set the properties manually:: + + p = Picture() + + with open("Folder.jpg", "rb") as f: + pic.data = f.read() + + pic.type = id3.PictureType.COVER_FRONT + pic.mime = u"image/jpeg" + pic.width = 500 + pic.height = 500 + pic.depth = 16 # color depth """ code = 6 @@ -562,12 +626,16 @@ class Picture(MetadataBlock): class Padding(MetadataBlock): - """Empty padding space for metadata blocks. + """Padding() + + Empty padding space for metadata blocks. To avoid rewriting the entire FLAC file when editing comments, metadata is often padded. Padding should occur at the end, and no - more than one padding block should be in any FLAC file. Mutagen - handles this with MetadataBlock.group_padding. + more than one padding block should be in any FLAC file. + + Attributes: + length (`int`): length """ code = 1 @@ -600,18 +668,25 @@ class Padding(MetadataBlock): class FLAC(mutagen.FileType): - """A FLAC audio file. + """FLAC(filething) + + A FLAC audio file. + + Args: + filething (filething) Attributes: - - * info -- stream information (length, bitrate, sample rate) - * tags -- metadata tags, if any - * cuesheet -- CueSheet object, if any - * seektable -- SeekTable object, if any - * pictures -- list of embedded pictures + cuesheet (`CueSheet`): if any or `None` + seektable (`SeekTable`): if any or `None` + pictures (List[`Picture`]): list of embedded pictures + info (`StreamInfo`) + tags (`mutagen._vorbis.VCommentDict`) """ - _mimes = ["audio/x-flac", "application/x-flac"] + _mimes = ["audio/flac", "audio/x-flac", "application/x-flac"] + + info = None + tags = None METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict, CueSheet, Picture] @@ -640,10 +715,14 @@ class FLAC(mutagen.FileType): # so we have to too. Instead of parsing the size # given, parse an actual Vorbis comment, leaving # fileobj in the right position. - # http://code.google.com/p/mutagen/issues/detail?id=52 + # https://github.com/quodlibet/mutagen/issues/52 # ..same for the Picture block: - # http://code.google.com/p/mutagen/issues/detail?id=106 + # https://github.com/quodlibet/mutagen/issues/106 + start = fileobj.tell() block = block_type(fileobj) + real_size = fileobj.tell() - start + if real_size > MetadataBlock._MAX_SIZE: + block._invalid_overflow_size = size else: data = fileobj.read(size) block = block_type(data) @@ -677,49 +756,63 @@ class FLAC(mutagen.FileType): add_vorbiscomment = add_tags - def delete(self, filename=None): + @loadfile(writable=True) + def delete(self, filething): """Remove Vorbis comments from a file. If no filename is given, the one most recently loaded is used. """ - if filename is None: - filename = self.filename - for s in list(self.metadata_blocks): - if isinstance(s, VCFLACDict): - self.metadata_blocks.remove(s) - self.tags = None - self.save() - break + + if self.tags is not None: + self.metadata_blocks.remove(self.tags) + try: + self.save(filething, padding=lambda x: 0) + finally: + self.metadata_blocks.append(self.tags) + self.tags.clear() vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.") - def load(self, filename): + @convert_error(IOError, error) + @loadfile() + def load(self, filething): """Load file information from a filename.""" + fileobj = filething.fileobj + self.metadata_blocks = [] self.tags = None self.cuesheet = None self.seektable = None - self.filename = filename - fileobj = StrictFileObject(open(filename, "rb")) - try: - self.__check_header(fileobj) - while self.__read_metadata_block(fileobj): - pass - finally: - fileobj.close() + + fileobj = StrictFileObject(fileobj) + self.__check_header(fileobj, filething.name) + while self.__read_metadata_block(fileobj): + pass try: self.metadata_blocks[0].length except (AttributeError, IndexError): raise FLACNoHeaderError("Stream info block not found") + if self.info.length: + start = fileobj.tell() + fileobj.seek(0, 2) + self.info.bitrate = int( + float(fileobj.tell() - start) * 8 / self.info.length) + else: + self.info.bitrate = 0 + @property def info(self): return self.metadata_blocks[0] def add_picture(self, picture): - """Add a new picture to the file.""" + """Add a new picture to the file. + + Args: + picture (Picture) + """ self.metadata_blocks.append(picture) def clear_pictures(self): @@ -730,71 +823,58 @@ class FLAC(mutagen.FileType): @property def pictures(self): - """List of embedded pictures""" + """ + Returns: + List[`Picture`]: List of embedded pictures + """ return [b for b in self.metadata_blocks if b.code == Picture.code] - def save(self, filename=None, deleteid3=False): + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, deleteid3=False, padding=None): """Save metadata blocks to a file. + Args: + filething (filething) + deleteid3 (bool): delete id3 tags while at it + padding (PaddingFunction) + If no filename is given, the one most recently loaded is used. """ - if filename is None: - filename = self.filename - f = open(filename, 'rb+') + f = StrictFileObject(filething.fileobj) + header = self.__check_header(f, filething.name) + audio_offset = self.__find_audio_offset(f) + # "fLaC" and maybe ID3 + available = audio_offset - header - try: - # Ensure we've got padding at the end, and only at the end. - # If adding makes it too large, we'll scale it down later. - self.metadata_blocks.append(Padding(b'\x00' * 1020)) - MetadataBlock.group_padding(self.metadata_blocks) + # Delete ID3v2 + if deleteid3 and header > 4: + available += header - 4 + header = 4 - header = self.__check_header(f) - # "fLaC" and maybe ID3 - available = self.__find_audio_offset(f) - header - data = MetadataBlock.writeblocks(self.metadata_blocks) + content_size = get_size(f) - audio_offset + assert content_size >= 0 + data = MetadataBlock._writeblocks( + self.metadata_blocks, available, content_size, padding) + data_size = len(data) - # Delete ID3v2 - if deleteid3 and header > 4: - available += header - 4 - header = 4 + resize_bytes(filething.fileobj, available, data_size, header) + f.seek(header - 4) + f.write(b"fLaC") + f.write(data) - if len(data) > available: - # If we have too much data, see if we can reduce padding. - padding = self.metadata_blocks[-1] - newlength = padding.length - (len(data) - available) - if newlength > 0: - padding.length = newlength - data = MetadataBlock.writeblocks(self.metadata_blocks) - assert len(data) == available - - elif len(data) < available: - # If we have too little data, increase padding. - self.metadata_blocks[-1].length += (available - len(data)) - data = MetadataBlock.writeblocks(self.metadata_blocks) - assert len(data) == available - - if len(data) != available: - # We couldn't reduce the padding enough. - diff = (len(data) - available) - insert_bytes(f, diff, header) - - f.seek(header - 4) - f.write(b"fLaC" + data) - - # Delete ID3v1 - if deleteid3: - try: + # Delete ID3v1 + if deleteid3: + try: + f.seek(-128, 2) + except IOError: + pass + else: + if f.read(3) == b"TAG": f.seek(-128, 2) - except IOError: - pass - else: - if f.read(3) == b"TAG": - f.seek(-128, 2) - f.truncate() - finally: - f.close() + f.truncate() def __find_audio_offset(self, fileobj): byte = 0x00 @@ -814,7 +894,12 @@ class FLAC(mutagen.FileType): fileobj.read(size) return fileobj.tell() - def __check_header(self, fileobj): + def __check_header(self, fileobj, name): + """Returns the offset of the flac block start + (skipping id3 tags if found). The passed fileobj will be advanced to + that offset as well. + """ + size = 4 header = fileobj.read(4) if header != b"fLaC": @@ -826,13 +911,24 @@ class FLAC(mutagen.FileType): size = None if size is None: raise FLACNoHeaderError( - "%r is not a valid FLAC file" % fileobj.name) + "%r is not a valid FLAC file" % name) return size Open = FLAC -def delete(filename): - """Remove tags from a file.""" - FLAC(filename).delete() +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """Remove tags from a file. + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + f = FLAC(filething) + filething.fileobj.seek(0) + f.delete(filething) diff --git a/lib/mutagen/id3/__init__.py b/lib/mutagen/id3/__init__.py old mode 100644 new mode 100755 index bfb4a3f6..325626f1 --- a/lib/mutagen/id3/__init__.py +++ b/lib/mutagen/id3/__init__.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # 2006 Lukas Lalinsky # 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """ID3v2 reading and writing. @@ -30,1005 +30,45 @@ Since this file's documentation is a little unwieldy, you are probably interested in the :class:`ID3` class to start with. """ -__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] - -import struct -import errno - -from struct import unpack, pack, error as StructError - -import mutagen -from mutagen._util import insert_bytes, delete_bytes, DictProxy, enum -from .._compat import chr_, PY3 - -from ._util import * -from ._frames import * -from ._specs import * - - -@enum -class ID3v1SaveOptions(object): - - REMOVE = 0 - """ID3v1 tags will be removed""" - - UPDATE = 1 - """ID3v1 tags will be updated but not added""" - - CREATE = 2 - """ID3v1 tags will be created and/or updated""" - - -class ID3(DictProxy, mutagen.Metadata): - """A file with an ID3v2 tag. - - Attributes: - - * version -- ID3 tag version as a tuple - * unknown_frames -- raw frame data of any unknown frames found - * size -- the total size of the ID3 tag, including the header - """ - - __module__ = "mutagen.id3" - - PEDANTIC = True - version = (2, 4, 0) - """ID3 tag version as a tuple (of the loaded file)""" - - filename = None - size = 0 - __flags = 0 - __readbytes = 0 - __crc = None - __unknown_version = None - - _V24 = (2, 4, 0) - _V23 = (2, 3, 0) - _V22 = (2, 2, 0) - _V11 = (1, 1) - - def __init__(self, *args, **kwargs): - self.unknown_frames = [] - super(ID3, self).__init__(*args, **kwargs) - - def __fullread(self, size): - """ Read a certain number of bytes from the source file. """ - - try: - if size < 0: - raise ValueError('Requested bytes (%s) less than zero' % size) - if size > self.__filesize: - raise EOFError('Requested %#x of %#x (%s)' % ( - int(size), int(self.__filesize), self.filename)) - except AttributeError: - pass - data = self._fileobj.read(size) - if len(data) != size: - raise EOFError - self.__readbytes += size - return data - - def load(self, filename, known_frames=None, translate=True, v2_version=4): - """Load tags from a filename. - - Keyword arguments: - - * filename -- filename to load tag data from - * known_frames -- dict mapping frame IDs to Frame objects - * translate -- Update all tags to ID3v2.3/4 internally. If you - intend to save, this must be true or you have to - call update_to_v23() / update_to_v24() manually. - * v2_version -- if update_to_v23 or update_to_v24 get called (3 or 4) - - Example of loading a custom frame:: - - my_frames = dict(mutagen.id3.Frames) - class XMYF(Frame): ... - my_frames["XMYF"] = XMYF - mutagen.id3.ID3(filename, known_frames=my_frames) - """ - - if v2_version not in (3, 4): - raise ValueError("Only 3 and 4 possible for v2_version") - - from os.path import getsize - - self.filename = filename - self.__known_frames = known_frames - self._fileobj = open(filename, 'rb') - self.__filesize = getsize(filename) - try: - try: - self._load_header() - except EOFError: - self.size = 0 - raise ID3NoHeaderError("%s: too small (%d bytes)" % ( - filename, self.__filesize)) - except (ID3NoHeaderError, ID3UnsupportedVersionError): - self.size = 0 - frames, offset = _find_id3v1(self._fileobj) - if frames is None: - raise - - self.version = self._V11 - for v in frames.values(): - self.add(v) - else: - frames = self.__known_frames - if frames is None: - if self._V23 <= self.version: - frames = Frames - elif self._V22 <= self.version: - frames = Frames_2_2 - data = self.__fullread(self.size - 10) - for frame in self.__read_frames(data, frames=frames): - if isinstance(frame, Frame): - self.add(frame) - else: - self.unknown_frames.append(frame) - self.__unknown_version = self.version[:2] - finally: - self._fileobj.close() - del self._fileobj - del self.__filesize - if translate: - if v2_version == 3: - self.update_to_v23() - else: - self.update_to_v24() - - def getall(self, key): - """Return all frames with a given name (the list may be empty). - - This is best explained by examples:: - - id3.getall('TIT2') == [id3['TIT2']] - id3.getall('TTTT') == [] - id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), - TXXX(desc='baz', text='quuuux'), ...] - - Since this is based on the frame's HashKey, which is - colon-separated, you can use it to do things like - ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``. - """ - if key in self: - return [self[key]] - else: - key = key + ":" - return [v for s, v in self.items() if s.startswith(key)] - - def delall(self, key): - """Delete all tags of a given kind; see getall.""" - if key in self: - del(self[key]) - else: - key = key + ":" - for k in list(self.keys()): - if k.startswith(key): - del(self[k]) - - def setall(self, key, values): - """Delete frames of the given type and add frames in 'values'.""" - self.delall(key) - for tag in values: - self[tag.HashKey] = tag - - def pprint(self): - """Return tags in a human-readable format. - - "Human-readable" is used loosely here. The format is intended - to mirror that used for Vorbis or APEv2 output, e.g. - - ``TIT2=My Title`` - - However, ID3 frames can have multiple keys: - - ``POPM=user@example.org=3 128/255`` - """ - frames = sorted(Frame.pprint(s) for s in self.values()) - return "\n".join(frames) - - def loaded_frame(self, tag): - """Deprecated; use the add method.""" - # turn 2.2 into 2.3/2.4 tags - if len(type(tag).__name__) == 3: - tag = type(tag).__base__(tag) - self[tag.HashKey] = tag - - # add = loaded_frame (and vice versa) break applications that - # expect to be able to override loaded_frame (e.g. Quod Libet), - # as does making loaded_frame call add. - def add(self, frame): - """Add a frame to the tag.""" - return self.loaded_frame(frame) - - def _load_header(self): - fn = self.filename - data = self.__fullread(10) - id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data) - self.__flags = flags - self.size = BitPaddedInt(size) + 10 - self.version = (2, vmaj, vrev) - - if id3 != b'ID3': - raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) - if vmaj not in [2, 3, 4]: - raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" - % (fn, vmaj)) - - if self.PEDANTIC: - if not BitPaddedInt.has_valid_padding(size): - raise ValueError("Header size not synchsafe") - - if (self._V24 <= self.version) and (flags & 0x0f): - raise ValueError("%r has invalid flags %#02x" % (fn, flags)) - elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): - raise ValueError("%r has invalid flags %#02x" % (fn, flags)) - - if self.f_extended: - extsize = self.__fullread(4) - frame_id = extsize.decode("ascii", "replace") if PY3 else extsize - if frame_id in Frames: - # Some tagger sets the extended header flag but - # doesn't write an extended header; in this case, the - # ID3 data follows immediately. Since no extended - # header is going to be long enough to actually match - # a frame, and if it's *not* a frame we're going to be - # completely lost anyway, this seems to be the most - # correct check. - # http://code.google.com/p/quodlibet/issues/detail?id=126 - self.__flags ^= 0x40 - self.__extsize = 0 - self._fileobj.seek(-4, 1) - self.__readbytes -= 4 - elif self.version >= self._V24: - # "Where the 'Extended header size' is the size of the whole - # extended header, stored as a 32 bit synchsafe integer." - self.__extsize = BitPaddedInt(extsize) - 4 - if self.PEDANTIC: - if not BitPaddedInt.has_valid_padding(extsize): - raise ValueError("Extended header size not synchsafe") - else: - # "Where the 'Extended header size', currently 6 or 10 bytes, - # excludes itself." - self.__extsize = unpack('>L', extsize)[0] - if self.__extsize: - self.__extdata = self.__fullread(self.__extsize) - else: - self.__extdata = b"" - - def __determine_bpi(self, data, frames): - if self.version < self._V24: - return int - - return _determine_bpi(data, frames) - - def __read_frames(self, data, frames): - if self.version < self._V24 and self.f_unsynch: - try: - data = unsynch.decode(data) - except ValueError: - pass - - if self._V23 <= self.version: - bpi = self.__determine_bpi(data, frames) - while data: - header = data[:10] - try: - name, size, flags = unpack('>4sLH', header) - except struct.error: - return # not enough header - if name.strip(b'\x00') == b'': - return - - size = bpi(size) - framedata = data[10:10 + size] - data = data[10 + size:] - if size == 0: - continue # drop empty frames - - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue - - try: - # someone writes 2.3 frames with 2.2 names - if name[-1] == "\x00": - tag = Frames_2_2[name[:-1]] - name = tag.__base__.__name__ - - tag = frames[name] - except KeyError: - if is_valid_frame_id(name): - yield header + framedata - else: - try: - yield self.__load_framedata(tag, flags, framedata) - except NotImplementedError: - yield header + framedata - except ID3JunkFrameError: - pass - - elif self._V22 <= self.version: - while data: - header = data[0:6] - try: - name, size = unpack('>3s3s', header) - except struct.error: - return # not enough header - size, = struct.unpack('>L', b'\x00' + size) - if name.strip(b'\x00') == b'': - return - - framedata = data[6:6 + size] - data = data[6 + size:] - if size == 0: - continue # drop empty frames - - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue - - try: - tag = frames[name] - except KeyError: - if is_valid_frame_id(name): - yield header + framedata - else: - try: - yield self.__load_framedata(tag, 0, framedata) - except NotImplementedError: - yield header + framedata - except ID3JunkFrameError: - pass - - def __load_framedata(self, tag, flags, framedata): - return tag.fromData(self, flags, framedata) - - f_unsynch = property(lambda s: bool(s.__flags & 0x80)) - f_extended = property(lambda s: bool(s.__flags & 0x40)) - f_experimental = property(lambda s: bool(s.__flags & 0x20)) - f_footer = property(lambda s: bool(s.__flags & 0x10)) - - # f_crc = property(lambda s: bool(s.__extflags & 0x8000)) - - def _prepare_framedata(self, v2_version, v23_sep): - if v2_version == 3: - version = self._V23 - elif v2_version == 4: - version = self._V24 - else: - raise ValueError("Only 3 or 4 allowed for v2_version") - - # Sort frames by 'importance' - order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] - order = dict((b, a) for a, b in enumerate(order)) - last = len(order) - frames = sorted(self.items(), - key=lambda a: (order.get(a[0][:4], last), a[0])) - - framedata = [self.__save_frame(frame, version=version, v23_sep=v23_sep) - for (key, frame) in frames] - - # only write unknown frames if they were loaded from the version - # we are saving with or upgraded to it - if self.__unknown_version == version[:2]: - framedata.extend(data for data in self.unknown_frames - if len(data) > 10) - - return b''.join(framedata) - - def _prepare_id3_header(self, original_header, framesize, v2_version): - try: - id3, vmaj, vrev, flags, insize = \ - unpack('>3sBBB4s', original_header) - except struct.error: - id3, insize = b'', 0 - insize = BitPaddedInt(insize) - if id3 != b'ID3': - insize = -10 - - if insize >= framesize: - outsize = insize - else: - outsize = (framesize + 1023) & ~0x3FF - - framesize = BitPaddedInt.to_str(outsize, width=4) - header = pack('>3sBBB4s', b'ID3', v2_version, 0, 0, framesize) - - return (header, outsize, insize) - - def save(self, filename=None, v1=1, v2_version=4, v23_sep='/'): - """Save changes to a file. - - Args: - filename: - Filename to save the tag to. If no filename is given, - the one most recently loaded is used. - v1 (ID3v1SaveOptions): - if 0, ID3v1 tags will be removed. - if 1, ID3v1 tags will be updated but not added. - if 2, ID3v1 tags will be created and/or updated - v2 (int): - version of ID3v2 tags (3 or 4). - v23_sep (str): - the separator used to join multiple text values - if v2_version == 3. Defaults to '/' but if it's None - will be the ID3v2v2.4 null separator. - - By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 - tags, you must call method update_to_v23 before saving the file. - - The lack of a way to update only an ID3v1 tag is intentional. - """ - - framedata = self._prepare_framedata(v2_version, v23_sep) - framesize = len(framedata) - - if not framedata: - try: - self.delete(filename) - except EnvironmentError as err: - from errno import ENOENT - if err.errno != ENOENT: - raise - return - - if filename is None: - filename = self.filename - try: - f = open(filename, 'rb+') - except IOError as err: - from errno import ENOENT - if err.errno != ENOENT: - raise - f = open(filename, 'ab') # create, then reopen - f = open(filename, 'rb+') - try: - idata = f.read(10) - - header = self._prepare_id3_header(idata, framesize, v2_version) - header, outsize, insize = header - - data = header + framedata + (b'\x00' * (outsize - framesize)) - - if (insize < outsize): - insert_bytes(f, outsize - insize, insize + 10) - f.seek(0) - f.write(data) - - self.__save_v1(f, v1) - - finally: - f.close() - - def __save_v1(self, f, v1): - tag, offset = _find_id3v1(f) - has_v1 = tag is not None - - f.seek(offset, 2) - if v1 == ID3v1SaveOptions.UPDATE and has_v1 or \ - v1 == ID3v1SaveOptions.CREATE: - f.write(MakeID3v1(self)) - else: - f.truncate() - - def delete(self, filename=None, delete_v1=True, delete_v2=True): - """Remove tags from a file. - - If no filename is given, the one most recently loaded is used. - - Keyword arguments: - - * delete_v1 -- delete any ID3v1 tag - * delete_v2 -- delete any ID3v2 tag - """ - if filename is None: - filename = self.filename - delete(filename, delete_v1, delete_v2) - self.clear() - - def __save_frame(self, frame, name=None, version=_V24, v23_sep=None): - flags = 0 - if self.PEDANTIC and isinstance(frame, TextFrame): - if len(str(frame)) == 0: - return b'' - - if version == self._V23: - framev23 = frame._get_v23_frame(sep=v23_sep) - framedata = framev23._writeData() - else: - framedata = frame._writeData() - - usize = len(framedata) - if usize > 2048: - # Disabled as this causes iTunes and other programs - # to fail to find these frames, which usually includes - # e.g. APIC. - # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') - # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN - pass - - if version == self._V24: - bits = 7 - elif version == self._V23: - bits = 8 - else: - raise ValueError - - datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) - - if name is not None: - assert isinstance(name, bytes) - frame_name = name - else: - frame_name = type(frame).__name__ - if PY3: - frame_name = frame_name.encode("ascii") - - header = pack('>4s4sH', frame_name, datasize, flags) - return header + framedata - - def __update_common(self): - """Updates done by both v23 and v24 update""" - - if "TCON" in self: - # Get rid of "(xx)Foobr" format. - self["TCON"].genres = self["TCON"].genres - - # ID3v2.2 LNK frames are just way too different to upgrade. - for frame in self.getall("LINK"): - if len(frame.frameid) != 4: - del self[frame.HashKey] - - mimes = {"PNG": "image/png", "JPG": "image/jpeg"} - for pic in self.getall("APIC"): - if pic.mime in mimes: - newpic = APIC( - encoding=pic.encoding, mime=mimes[pic.mime], - type=pic.type, desc=pic.desc, data=pic.data) - self.add(newpic) - - def update_to_v24(self): - """Convert older tags into an ID3v2.4 tag. - - This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to - TDRC). If you intend to save tags, you must call this function - at some point; it is called by default when loading the tag. - """ - - self.__update_common() - - if self.__unknown_version == (2, 3): - # convert unknown 2.3 frames (flags/size) to 2.4 - converted = [] - for frame in self.unknown_frames: - try: - name, size, flags = unpack('>4sLH', frame[:10]) - frame = BinaryFrame.fromData(self, flags, frame[10:]) - except (struct.error, error): - continue - - converted.append(self.__save_frame(frame, name=name)) - self.unknown_frames[:] = converted - self.__unknown_version = (2, 4) - - # TDAT, TYER, and TIME have been turned into TDRC. - try: - date = text_type(self.get("TYER", "")) - if date.strip(u"\x00"): - self.pop("TYER") - dat = text_type(self.get("TDAT", "")) - if dat.strip("\x00"): - self.pop("TDAT") - date = "%s-%s-%s" % (date, dat[2:], dat[:2]) - time = text_type(self.get("TIME", "")) - if time.strip("\x00"): - self.pop("TIME") - date += "T%s:%s:00" % (time[:2], time[2:]) - if "TDRC" not in self: - self.add(TDRC(encoding=0, text=date)) - except UnicodeDecodeError: - # Old ID3 tags have *lots* of Unicode problems, so if TYER - # is bad, just chuck the frames. - pass - - # TORY can be the first part of a TDOR. - if "TORY" in self: - f = self.pop("TORY") - if "TDOR" not in self: - try: - self.add(TDOR(encoding=0, text=str(f))) - except UnicodeDecodeError: - pass - - # IPLS is now TIPL. - if "IPLS" in self: - f = self.pop("IPLS") - if "TIPL" not in self: - self.add(TIPL(encoding=f.encoding, people=f.people)) - - # These can't be trivially translated to any ID3v2.4 tags, or - # should have been removed already. - for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]: - if key in self: - del(self[key]) - - def update_to_v23(self): - """Convert older (and newer) tags into an ID3v2.3 tag. - - This updates incompatible ID3v2 frames to ID3v2.3 ones. If you - intend to save tags as ID3v2.3, you must call this function - at some point. - - If you want to to go off spec and include some v2.4 frames - in v2.3, remove them before calling this and add them back afterwards. - """ - - self.__update_common() - - # we could downgrade unknown v2.4 frames here, but given that - # the main reason to save v2.3 is compatibility and this - # might increase the chance of some parser breaking.. better not - - # TMCL, TIPL -> TIPL - if "TIPL" in self or "TMCL" in self: - people = [] - if "TIPL" in self: - f = self.pop("TIPL") - people.extend(f.people) - if "TMCL" in self: - f = self.pop("TMCL") - people.extend(f.people) - if "IPLS" not in self: - self.add(IPLS(encoding=f.encoding, people=people)) - - # TDOR -> TORY - if "TDOR" in self: - f = self.pop("TDOR") - if f.text: - d = f.text[0] - if d.year and "TORY" not in self: - self.add(TORY(encoding=f.encoding, text="%04d" % d.year)) - - # TDRC -> TYER, TDAT, TIME - if "TDRC" in self: - f = self.pop("TDRC") - if f.text: - d = f.text[0] - if d.year and "TYER" not in self: - self.add(TYER(encoding=f.encoding, text="%04d" % d.year)) - if d.month and d.day and "TDAT" not in self: - self.add(TDAT(encoding=f.encoding, - text="%02d%02d" % (d.day, d.month))) - if d.hour and d.minute and "TIME" not in self: - self.add(TIME(encoding=f.encoding, - text="%02d%02d" % (d.hour, d.minute))) - - # New frames added in v2.4 - v24_frames = [ - 'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR', - 'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO', - 'TSOA', 'TSOP', 'TSOT', 'TSST', - ] - - for key in v24_frames: - if key in self: - del(self[key]) - - -def delete(filename, delete_v1=True, delete_v2=True): - """Remove tags from a file. - - Keyword arguments: - - * delete_v1 -- delete any ID3v1 tag - * delete_v2 -- delete any ID3v2 tag - """ - - f = open(filename, 'rb+') - - if delete_v1: - tag, offset = _find_id3v1(f) - if tag is not None: - f.seek(offset, 2) - f.truncate() - - # technically an insize=0 tag is invalid, but we delete it anyway - # (primarily because we used to write it) - if delete_v2: - f.seek(0, 0) - idata = f.read(10) - try: - id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) - except struct.error: - id3, insize = b'', -1 - insize = BitPaddedInt(insize) - if id3 == b'ID3' and insize >= 0: - delete_bytes(f, insize + 10, 0) - +from ._file import ID3, ID3FileType, delete, ID3v1SaveOptions +from ._specs import Encoding, PictureType, CTOCFlags, ID3TimeStamp +from ._frames import Frames, Frames_2_2, Frame, TextFrame, UrlFrame, \ + UrlFrameU, TimeStampTextFrame, BinaryFrame, NumericPartTextFrame, \ + NumericTextFrame, PairedTextFrame +from ._util import ID3NoHeaderError, error, ID3UnsupportedVersionError +from ._id3v1 import ParseID3v1, MakeID3v1 +from ._tags import ID3Tags + +# deprecated +from ._util import ID3EncryptionUnsupportedError, ID3JunkFrameError, \ + ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, ID3Warning, \ + BitPaddedInt as _BitPaddedIntForPicard + + +for f in Frames: + globals()[f] = Frames[f] +for f in Frames_2_2: + globals()[f] = Frames_2_2[f] # support open(filename) as interface Open = ID3 - -def _determine_bpi(data, frames, EMPTY=b"\x00" * 10): - """Takes id3v2.4 frame data and determines if ints or bitpaddedints - should be used for parsing. Needed because iTunes used to write - normal ints for frame sizes. - """ - - # count number of tags found as BitPaddedInt and how far past - o = 0 - asbpi = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - bpioff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - size = BitPaddedInt(size) - o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue - if name in frames: - asbpi += 1 - else: - bpioff = o - len(data) - - # count number of tags found as int and how far past - o = 0 - asint = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - intoff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue - if name in frames: - asint += 1 - else: - intoff = o - len(data) - - # if more tags as int, or equal and bpi is past and int is not - if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): - return int - return BitPaddedInt +# pyflakes +ID3, ID3FileType, delete, ID3v1SaveOptions, Encoding, PictureType, CTOCFlags, +ID3TimeStamp, Frames, Frames_2_2, Frame, TextFrame, UrlFrame, UrlFrameU, +TimeStampTextFrame, BinaryFrame, NumericPartTextFrame, NumericTextFrame, +PairedTextFrame, ID3NoHeaderError, error, ID3UnsupportedVersionError, +ParseID3v1, MakeID3v1, ID3Tags, ID3EncryptionUnsupportedError, +ID3JunkFrameError, ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, +ID3Warning -def _find_id3v1(fileobj): - """Returns a tuple of (id3tag, offset_to_end) or (None, 0) +# Workaround for http://tickets.musicbrainz.org/browse/PICARD-833 +class _DummySpecForPicard(object): + write = None - offset mainly because we used to write too short tags in some cases and - we need the offset to delete them. - """ - - # id3v1 is always at the end (after apev2) - - extra_read = b"APETAGEX".index(b"TAG") - - try: - fileobj.seek(-128 - extra_read, 2) - except IOError as e: - if e.errno == errno.EINVAL: - # If the file is too small, might be ok since we wrote too small - # tags at some point. let's see how the parsing goes.. - fileobj.seek(0, 0) - else: - raise - - data = fileobj.read(128 + extra_read) - try: - idx = data.index(b"TAG") - except ValueError: - return (None, 0) - else: - # FIXME: make use of the apev2 parser here - # if TAG is part of APETAGEX assume this is an APEv2 tag - try: - ape_idx = data.index(b"APETAGEX") - except ValueError: - pass - else: - if idx == ape_idx + extra_read: - return (None, 0) - - tag = ParseID3v1(data[idx:]) - if tag is None: - return (None, 0) - - offset = idx - len(data) - return (tag, offset) +EncodedTextSpec = MultiSpec = _DummySpecForPicard +BitPaddedInt = _BitPaddedIntForPicard -# ID3v1.1 support. -def ParseID3v1(data): - """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. - - Returns a {frame_name: frame} dict or None. - """ - - try: - data = data[data.index(b"TAG"):] - except ValueError: - return None - if 128 < len(data) or len(data) < 124: - return None - - # Issue #69 - Previous versions of Mutagen, when encountering - # out-of-spec TDRC and TYER frames of less than four characters, - # wrote only the characters available - e.g. "1" or "" - into the - # year field. To parse those, reduce the size of the year field. - # Amazingly, "0s" works as a struct format string. - unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124) - - try: - tag, title, artist, album, year, comment, track, genre = unpack( - unpack_fmt, data) - except StructError: - return None - - if tag != b"TAG": - return None - - def fix(data): - return data.split(b"\x00")[0].strip().decode('latin1') - - title, artist, album, year, comment = map( - fix, [title, artist, album, year, comment]) - - frames = {} - if title: - frames["TIT2"] = TIT2(encoding=0, text=title) - if artist: - frames["TPE1"] = TPE1(encoding=0, text=[artist]) - if album: - frames["TALB"] = TALB(encoding=0, text=album) - if year: - frames["TDRC"] = TDRC(encoding=0, text=year) - if comment: - frames["COMM"] = COMM( - encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) - # Don't read a track number if it looks like the comment was - # padded with spaces instead of nulls (thanks, WinAmp). - if track and ((track != 32) or (data[-3] == b'\x00'[0])): - frames["TRCK"] = TRCK(encoding=0, text=str(track)) - if genre != 255: - frames["TCON"] = TCON(encoding=0, text=str(genre)) - return frames - - -def MakeID3v1(id3): - """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames.""" - - v1 = {} - - for v2id, name in {"TIT2": "title", "TPE1": "artist", - "TALB": "album"}.items(): - if v2id in id3: - text = id3[v2id].text[0].encode('latin1', 'replace')[:30] - else: - text = b"" - v1[name] = text + (b"\x00" * (30 - len(text))) - - if "COMM" in id3: - cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] - else: - cmnt = b"" - v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt))) - - if "TRCK" in id3: - try: - v1["track"] = chr_(+id3["TRCK"]) - except ValueError: - v1["track"] = b"\x00" - else: - v1["track"] = b"\x00" - - if "TCON" in id3: - try: - genre = id3["TCON"].genres[0] - except IndexError: - pass - else: - if genre in TCON.GENRES: - v1["genre"] = chr_(TCON.GENRES.index(genre)) - if "genre" not in v1: - v1["genre"] = b"\xff" - - if "TDRC" in id3: - year = text_type(id3["TDRC"]).encode('ascii') - elif "TYER" in id3: - year = text_type(id3["TYER"]).encode('ascii') - else: - year = b"" - v1["year"] = (year + b"\x00\x00\x00\x00")[:4] - - return ( - b"TAG" + - v1["title"] + - v1["artist"] + - v1["album"] + - v1["year"] + - v1["comment"] + - v1["track"] + - v1["genre"] - ) - - -class ID3FileType(mutagen.FileType): - """An unknown type of file with ID3 tags.""" - - ID3 = ID3 - - class _Info(mutagen.StreamInfo): - length = 0 - - def __init__(self, fileobj, offset): - pass - - @staticmethod - def pprint(): - return "Unknown format with ID3 tag" - - @staticmethod - def score(filename, fileobj, header_data): - return header_data.startswith(b"ID3") - - def add_tags(self, ID3=None): - """Add an empty ID3 tag to the file. - - A custom tag reader may be used in instead of the default - mutagen.id3.ID3 object, e.g. an EasyID3 reader. - """ - if ID3 is None: - ID3 = self.ID3 - if self.tags is None: - self.ID3 = ID3 - self.tags = ID3() - else: - raise error("an ID3 tag already exists") - - def load(self, filename, ID3=None, **kwargs): - """Load stream and tag information from a file. - - A custom tag reader may be used in instead of the default - mutagen.id3.ID3 object, e.g. an EasyID3 reader. - """ - - if ID3 is None: - ID3 = self.ID3 - else: - # If this was initialized with EasyID3, remember that for - # when tags are auto-instantiated in add_tags. - self.ID3 = ID3 - self.filename = filename - try: - self.tags = ID3(filename, **kwargs) - except error: - self.tags = None - if self.tags is not None: - try: - offset = self.tags.size - except AttributeError: - offset = None - else: - offset = None - try: - fileobj = open(filename, "rb") - self.info = self._Info(fileobj, offset) - finally: - fileobj.close() +__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] diff --git a/lib/mutagen/id3/_file.py b/lib/mutagen/id3/_file.py new file mode 100755 index 00000000..69c3a3f0 --- /dev/null +++ b/lib/mutagen/id3/_file.py @@ -0,0 +1,406 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2005 Michael Urman +# 2006 Lukas Lalinsky +# 2013 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import struct + +import mutagen +from mutagen._util import insert_bytes, delete_bytes, enum, \ + loadfile, convert_error, read_full +from mutagen._tags import PaddingInfo + +from ._util import error, ID3NoHeaderError, ID3UnsupportedVersionError, \ + BitPaddedInt +from ._tags import ID3Tags, ID3Header, ID3SaveConfig +from ._id3v1 import MakeID3v1, find_id3v1 + + +@enum +class ID3v1SaveOptions(object): + + REMOVE = 0 + """ID3v1 tags will be removed""" + + UPDATE = 1 + """ID3v1 tags will be updated but not added""" + + CREATE = 2 + """ID3v1 tags will be created and/or updated""" + + +class ID3(ID3Tags, mutagen.Metadata): + """ID3(filething=None) + + A file with an ID3v2 tag. + + If any arguments are given, the :meth:`load` is called with them. If no + arguments are given then an empty `ID3` object is created. + + :: + + ID3("foo.mp3") + # same as + t = ID3() + t.load("foo.mp3") + + Arguments: + filething (filething): or `None` + + Attributes: + version (Tuple[int]): ID3 tag version as a tuple + unknown_frames (List[bytes]): raw frame data of any unknown frames + found + size (int): the total size of the ID3 tag, including the header + """ + + __module__ = "mutagen.id3" + + PEDANTIC = True + """`bool`: + + .. deprecated:: 1.28 + + Doesn't have any effect + """ + + filename = None + + def __init__(self, *args, **kwargs): + self._header = None + self._version = (2, 4, 0) + super(ID3, self).__init__(*args, **kwargs) + + @property + def version(self): + """`tuple`: ID3 tag version as a tuple (of the loaded file)""" + + if self._header is not None: + return self._header.version + return self._version + + @version.setter + def version(self, value): + self._version = value + + @property + def f_unsynch(self): + if self._header is not None: + return self._header.f_unsynch + return False + + @property + def f_extended(self): + if self._header is not None: + return self._header.f_extended + return False + + @property + def size(self): + if self._header is not None: + return self._header.size + return 0 + + def _pre_load_header(self, fileobj): + # XXX: for aiff to adjust the offset.. + pass + + @convert_error(IOError, error) + @loadfile() + def load(self, filething, known_frames=None, translate=True, v2_version=4): + """load(filething, known_frames=None, translate=True, v2_version=4) + + Load tags from a filename. + + Args: + filename (filething): filename or file object to load tag data from + known_frames (Dict[`mutagen.text`, `Frame`]): dict mapping frame + IDs to Frame objects + translate (bool): Update all tags to ID3v2.3/4 internally. If you + intend to save, this must be true or you have to + call update_to_v23() / update_to_v24() manually. + v2_version (int): if update_to_v23 or update_to_v24 get called + (3 or 4) + + Example of loading a custom frame:: + + my_frames = dict(mutagen.id3.Frames) + class XMYF(Frame): ... + my_frames["XMYF"] = XMYF + mutagen.id3.ID3(filename, known_frames=my_frames) + """ + + fileobj = filething.fileobj + + if v2_version not in (3, 4): + raise ValueError("Only 3 and 4 possible for v2_version") + + self.unknown_frames = [] + self._header = None + self._padding = 0 + + self._pre_load_header(fileobj) + + try: + self._header = ID3Header(fileobj) + except (ID3NoHeaderError, ID3UnsupportedVersionError): + frames, offset = find_id3v1(fileobj) + if frames is None: + raise + + self.version = ID3Header._V11 + for v in frames.values(): + self.add(v) + else: + # XXX: attach to the header object so we have it in spec parsing.. + if known_frames is not None: + self._header._known_frames = known_frames + + data = read_full(fileobj, self.size - 10) + remaining_data = self._read(self._header, data) + self._padding = len(remaining_data) + + if translate: + if v2_version == 3: + self.update_to_v23() + else: + self.update_to_v24() + + def _prepare_data(self, fileobj, start, available, v2_version, v23_sep, + pad_func): + + if v2_version not in (3, 4): + raise ValueError("Only 3 or 4 allowed for v2_version") + + config = ID3SaveConfig(v2_version, v23_sep) + framedata = self._write(config) + + needed = len(framedata) + 10 + + fileobj.seek(0, 2) + trailing_size = fileobj.tell() - start + + info = PaddingInfo(available - needed, trailing_size) + new_padding = info._get_padding(pad_func) + if new_padding < 0: + raise error("invalid padding") + new_size = needed + new_padding + + new_framesize = BitPaddedInt.to_str(new_size - 10, width=4) + header = struct.pack( + '>3sBBB4s', b'ID3', v2_version, 0, 0, new_framesize) + + data = header + framedata + assert new_size >= len(data) + data += (new_size - len(data)) * b'\x00' + assert new_size == len(data) + + return data + + @convert_error(IOError, error) + @loadfile(writable=True, create=True) + def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) + + Save changes to a file. + + Args: + filename (fspath): + Filename to save the tag to. If no filename is given, + the one most recently loaded is used. + v1 (ID3v1SaveOptions): + if 0, ID3v1 tags will be removed. + if 1, ID3v1 tags will be updated but not added. + if 2, ID3v1 tags will be created and/or updated + v2 (int): + version of ID3v2 tags (3 or 4). + v23_sep (text): + the separator used to join multiple text values + if v2_version == 3. Defaults to '/' but if it's None + will be the ID3v2v2.4 null separator. + padding (PaddingFunction) + + Raises: + mutagen.MutagenError + + By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 + tags, you must call method update_to_v23 before saving the file. + + The lack of a way to update only an ID3v1 tag is intentional. + """ + + f = filething.fileobj + + try: + header = ID3Header(filething.fileobj) + except ID3NoHeaderError: + old_size = 0 + else: + old_size = header.size + + data = self._prepare_data( + f, 0, old_size, v2_version, v23_sep, padding) + new_size = len(data) + + if (old_size < new_size): + insert_bytes(f, new_size - old_size, old_size) + elif (old_size > new_size): + delete_bytes(f, old_size - new_size, new_size) + f.seek(0) + f.write(data) + + self.__save_v1(f, v1) + + def __save_v1(self, f, v1): + tag, offset = find_id3v1(f) + has_v1 = tag is not None + + f.seek(offset, 2) + if v1 == ID3v1SaveOptions.UPDATE and has_v1 or \ + v1 == ID3v1SaveOptions.CREATE: + f.write(MakeID3v1(self)) + else: + f.truncate() + + @loadfile(writable=True) + def delete(self, filething, delete_v1=True, delete_v2=True): + """delete(filething=None, delete_v1=True, delete_v2=True) + + Remove tags from a file. + + Args: + filething (filething): A filename or `None` to use the one used + when loading. + delete_v1 (bool): delete any ID3v1 tag + delete_v2 (bool): delete any ID3v2 tag + + If no filename is given, the one most recently loaded is used. + """ + + delete(filething, delete_v1, delete_v2) + self.clear() + + +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething, delete_v1=True, delete_v2=True): + """Remove tags from a file. + + Args: + delete_v1 (bool): delete any ID3v1 tag + delete_v2 (bool): delete any ID3v2 tag + + Raises: + mutagen.MutagenError: In case deleting failed + """ + + f = filething.fileobj + + if delete_v1: + tag, offset = find_id3v1(f) + if tag is not None: + f.seek(offset, 2) + f.truncate() + + # technically an insize=0 tag is invalid, but we delete it anyway + # (primarily because we used to write it) + if delete_v2: + f.seek(0, 0) + idata = f.read(10) + try: + id3, vmaj, vrev, flags, insize = struct.unpack('>3sBBB4s', idata) + except struct.error: + pass + else: + insize = BitPaddedInt(insize) + if id3 == b'ID3' and insize >= 0: + delete_bytes(f, insize + 10, 0) + + +class ID3FileType(mutagen.FileType): + """ID3FileType(filething, ID3=None, **kwargs) + + An unknown type of file with ID3 tags. + + Args: + filething (filething): A filename or file-like object + ID3 (ID3): An ID3 subclass to use for tags. + + Raises: + mutagen.MutagenError: In case loading the file failed + + Load stream and tag information from a file. + + A custom tag reader may be used in instead of the default + mutagen.id3.ID3 object, e.g. an EasyID3 reader. + """ + + __module__ = "mutagen.id3" + + ID3 = ID3 + + class _Info(mutagen.StreamInfo): + length = 0 + + def __init__(self, fileobj, offset): + pass + + @staticmethod + def pprint(): + return u"Unknown format with ID3 tag" + + @staticmethod + def score(filename, fileobj, header_data): + return header_data.startswith(b"ID3") + + def add_tags(self, ID3=None): + """Add an empty ID3 tag to the file. + + Args: + ID3 (ID3): An ID3 subclass to use or `None` to use the one + that used when loading. + + A custom tag reader may be used in instead of the default + `ID3` object, e.g. an `mutagen.easyid3.EasyID3` reader. + """ + + if ID3 is None: + ID3 = self.ID3 + if self.tags is None: + self.ID3 = ID3 + self.tags = ID3() + else: + raise error("an ID3 tag already exists") + + @loadfile() + def load(self, filething, ID3=None, **kwargs): + # see __init__ for docs + + fileobj = filething.fileobj + + if ID3 is None: + ID3 = self.ID3 + else: + # If this was initialized with EasyID3, remember that for + # when tags are auto-instantiated in add_tags. + self.ID3 = ID3 + + try: + self.tags = ID3(fileobj, **kwargs) + except ID3NoHeaderError: + self.tags = None + + if self.tags is not None: + try: + offset = self.tags.size + except AttributeError: + offset = None + else: + offset = None + + self.info = self._Info(fileobj, offset) diff --git a/lib/mutagen/id3/_frames.py b/lib/mutagen/id3/_frames.py old mode 100644 new mode 100755 index 75bfee53..5e3de5a6 --- a/lib/mutagen/id3/_frames.py +++ b/lib/mutagen/id3/_frames.py @@ -1,29 +1,25 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import zlib -from warnings import warn from struct import unpack -from ._util import ( - ID3Warning, ID3JunkFrameError, ID3BadCompressedData, - ID3EncryptionUnsupportedError, ID3BadUnsynchData, unsynch) -from ._specs import ( - BinaryDataSpec, StringSpec, Latin1TextSpec, EncodedTextSpec, ByteSpec, - EncodingSpec, ASPIIndexSpec, SizedIntegerSpec, IntegerSpec, - VolumeAdjustmentsSpec, VolumePeakSpec, VolumeAdjustmentSpec, - ChannelSpec, MultiSpec, SynchronizedTextSpec, KeyEventSpec, TimeStampSpec, - EncodedNumericPartTextSpec, EncodedNumericTextSpec) -from .._compat import text_type, string_types, swap_to_string, iteritems - - -def is_valid_frame_id(frame_id): - return frame_id.isalnum() and frame_id.isupper() +from ._util import ID3JunkFrameError, ID3EncryptionUnsupportedError, unsynch, \ + ID3SaveConfig, error +from ._specs import BinaryDataSpec, StringSpec, Latin1TextSpec, \ + EncodedTextSpec, ByteSpec, EncodingSpec, ASPIIndexSpec, SizedIntegerSpec, \ + IntegerSpec, Encoding, VolumeAdjustmentsSpec, VolumePeakSpec, \ + VolumeAdjustmentSpec, ChannelSpec, MultiSpec, SynchronizedTextSpec, \ + KeyEventSpec, TimeStampSpec, EncodedNumericPartTextSpec, \ + EncodedNumericTextSpec, SpecError, PictureTypeSpec, ID3FramesSpec, \ + Latin1TextListSpec, CTOCFlagsSpec, FrameIDSpec, RVASpec +from .._compat import text_type, string_types, swap_to_string, iteritems, \ + izip, itervalues def _bytes2key(b): @@ -56,6 +52,7 @@ class Frame(object): FLAG24_DATALEN = 0x0001 _framespec = [] + _optionalspec = [] def __init__(self, *args, **kwargs): if len(args) == 1 and len(kwargs) == 0 and \ @@ -64,15 +61,30 @@ class Frame(object): # ask the sub class to fill in our data other._to_other(self) else: - for checker, val in zip(self._framespec, args): - setattr(self, checker.name, checker.validate(self, val)) + for checker, val in izip(self._framespec, args): + setattr(self, checker.name, val) for checker in self._framespec[len(args):]: - try: - validated = checker.validate( - self, kwargs.get(checker.name, None)) - except ValueError as e: - raise ValueError("%s: %s" % (checker.name, e)) - setattr(self, checker.name, validated) + setattr(self, checker.name, + kwargs.get(checker.name, checker.default)) + for spec in self._optionalspec: + if spec.name in kwargs: + setattr(self, spec.name, kwargs[spec.name]) + else: + break + + def __setattr__(self, name, value): + for checker in self._framespec: + if checker.name == name: + self._setattr(name, checker.validate(self, value)) + return + for checker in self._optionalspec: + if checker.name == name: + self._setattr(name, checker.validate(self, value)) + return + super(Frame, self).__setattr__(name, value) + + def _setattr(self, name, value): + self.__dict__[name] = value def _to_other(self, other): # this impl covers subclasses with the same framespec @@ -80,7 +92,35 @@ class Frame(object): raise ValueError for checker in other._framespec: - setattr(other, checker.name, getattr(self, checker.name)) + other._setattr(checker.name, getattr(self, checker.name)) + + # this impl covers subclasses with the same optionalspec + if other._optionalspec is not self._optionalspec: + raise ValueError + + for checker in other._optionalspec: + if hasattr(self, checker.name): + other._setattr(checker.name, getattr(self, checker.name)) + + def _merge_frame(self, other): + # default impl, use the new tag over the old one + return other + + def _upgrade_frame(self): + """Returns either this instance or a new instance if this is a v2.2 + frame and an upgrade to a v2.3/4 equivalent is viable. + + If this is a v2.2 instance and there is no upgrade path, returns None. + """ + + # turn 2.2 into 2.3/2.4 tags + if len(type(self).__name__) == 3: + base = type(self).__base__ + if base is Frame: + return + return base(self) + else: + return self def _get_v23_frame(self, **kwargs): """Returns a frame copy which is suitable for writing into a v2.3 tag. @@ -93,6 +133,13 @@ class Frame(object): name = checker.name value = getattr(self, name) new_kwargs[name] = checker._validate23(self, value, **kwargs) + + for checker in self._optionalspec: + name = checker.name + if hasattr(self, name): + value = getattr(self, name) + new_kwargs[name] = checker._validate23(self, value, **kwargs) + return type(self)(**new_kwargs) @property @@ -118,28 +165,64 @@ class Frame(object): # so repr works during __init__ if hasattr(self, attr.name): kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + for attr in self._optionalspec: + if hasattr(self, attr.name): + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) return '%s(%s)' % (type(self).__name__, ', '.join(kw)) - def _readData(self, data): - odata = data - for reader in self._framespec: - if len(data): - try: - value, data = reader.read(self, data) - except UnicodeDecodeError: - raise ID3JunkFrameError - else: - raise ID3JunkFrameError - setattr(self, reader.name, value) - if data.strip(b'\x00'): - warn('Leftover data: %s: %r (from %r)' % ( - type(self).__name__, data, odata), - ID3Warning) + def _readData(self, id3, data): + """Raises ID3JunkFrameError; Returns leftover data""" + + for reader in self._framespec: + if len(data) or reader.handle_nodata: + try: + value, data = reader.read(id3, self, data) + except SpecError as e: + raise ID3JunkFrameError(e) + else: + raise ID3JunkFrameError("no data left") + self._setattr(reader.name, value) + + for reader in self._optionalspec: + if len(data) or reader.handle_nodata: + try: + value, data = reader.read(id3, self, data) + except SpecError as e: + raise ID3JunkFrameError(e) + else: + break + self._setattr(reader.name, value) + + return data + + def _writeData(self, config=None): + """Raises error""" + + if config is None: + config = ID3SaveConfig() + + if config.v2_version == 3: + frame = self._get_v23_frame(sep=config.v23_separator) + else: + frame = self - def _writeData(self): data = [] for writer in self._framespec: - data.append(writer.write(self, getattr(self, writer.name))) + try: + data.append( + writer.write(config, frame, getattr(frame, writer.name))) + except SpecError as e: + raise error(e) + + for writer in self._optionalspec: + try: + data.append( + writer.write(config, frame, getattr(frame, writer.name))) + except AttributeError: + break + except SpecError as e: + raise error(e) + return b''.join(data) def pprint(self): @@ -150,10 +233,17 @@ class Frame(object): return "[unrepresentable data]" @classmethod - def fromData(cls, id3, tflags, data): - """Construct this ID3 frame from raw string data.""" + def _fromData(cls, header, tflags, data): + """Construct this ID3 frame from raw string data. - if id3._V24 <= id3.version: + Raises: + + ID3JunkFrameError in case parsing failed + NotImplementedError in case parsing isn't implemented + ID3EncryptionUnsupportedError in case the frame is encrypted. + """ + + if header.version >= header._V24: if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN): # The data length int is syncsafe in 2.4 (but not 2.3). # However, we don't actually need the data length int, @@ -161,12 +251,15 @@ class Frame(object): # all we need are the raw bytes. datalen_bytes = data[:4] data = data[4:] - if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch: + if tflags & Frame.FLAG24_UNSYNCH or header.f_unsynch: try: data = unsynch.decode(data) - except ValueError as err: - if id3.PEDANTIC: - raise ID3BadUnsynchData('%s: %r' % (err, data)) + except ValueError: + # Some things write synch-unsafe data with either the frame + # or global unsynch flag set. Try to load them as is. + # https://github.com/quodlibet/mutagen/issues/210 + # https://github.com/quodlibet/mutagen/issues/223 + pass if tflags & Frame.FLAG24_ENCRYPT: raise ID3EncryptionUnsupportedError if tflags & Frame.FLAG24_COMPRESS: @@ -179,10 +272,10 @@ class Frame(object): try: data = zlib.decompress(data) except zlib.error as err: - if id3.PEDANTIC: - raise ID3BadCompressedData('%s: %r' % (err, data)) + raise ID3JunkFrameError( + 'zlib: %s: %r' % (err, data)) - elif id3._V23 <= id3.version: + elif header.version >= header._V23: if tflags & Frame.FLAG23_COMPRESS: usize, = unpack('>L', data[:4]) data = data[4:] @@ -192,87 +285,96 @@ class Frame(object): try: data = zlib.decompress(data) except zlib.error as err: - if id3.PEDANTIC: - raise ID3BadCompressedData('%s: %r' % (err, data)) + raise ID3JunkFrameError('zlib: %s: %r' % (err, data)) frame = cls() - frame._rawdata = data - frame._flags = tflags - frame._readData(data) + frame._readData(header, data) return frame def __hash__(self): raise TypeError("Frame objects are unhashable") -class FrameOpt(Frame): - """A frame with optional parts. +class CHAP(Frame): + """Chapter""" - Some ID3 frames have optional data; this class extends Frame to - provide support for those parts. - """ + _framespec = [ + Latin1TextSpec("element_id"), + SizedIntegerSpec("start_time", 4, default=0), + SizedIntegerSpec("end_time", 4, default=0), + SizedIntegerSpec("start_offset", 4, default=0xffffffff), + SizedIntegerSpec("end_offset", 4, default=0xffffffff), + ID3FramesSpec("sub_frames"), + ] - _optionalspec = [] + @property + def HashKey(self): + return '%s:%s' % (self.FrameID, self.element_id) - def __init__(self, *args, **kwargs): - super(FrameOpt, self).__init__(*args, **kwargs) - for spec in self._optionalspec: - if spec.name in kwargs: - validated = spec.validate(self, kwargs[spec.name]) - setattr(self, spec.name, validated) - else: - break + def __eq__(self, other): + if not isinstance(other, CHAP): + return False - def _to_other(self, other): - super(FrameOpt, self)._to_other(other) + self_frames = self.sub_frames or {} + other_frames = other.sub_frames or {} + if sorted(self_frames.values()) != sorted(other_frames.values()): + return False - # this impl covers subclasses with the same optionalspec - if other._optionalspec is not self._optionalspec: - raise ValueError + return self.element_id == other.element_id and \ + self.start_time == other.start_time and \ + self.end_time == other.end_time and \ + self.start_offset == other.start_offset and \ + self.end_offset == other.end_offset - for checker in other._optionalspec: - if hasattr(self, checker.name): - setattr(other, checker.name, getattr(self, checker.name)) + __hash__ = Frame.__hash__ - def _readData(self, data): - odata = data - for reader in self._framespec: - if len(data): - value, data = reader.read(self, data) - else: - raise ID3JunkFrameError - setattr(self, reader.name, value) - if data: - for reader in self._optionalspec: - if len(data): - value, data = reader.read(self, data) - else: - break - setattr(self, reader.name, value) - if data.strip(b'\x00'): - warn('Leftover data: %s: %r (from %r)' % ( - type(self).__name__, data, odata), - ID3Warning) + def _pprint(self): + frame_pprint = u"" + for frame in itervalues(self.sub_frames): + for line in frame.pprint().splitlines(): + frame_pprint += "\n" + " " * 4 + line + return u"%s time=%d..%d offset=%d..%d%s" % ( + self.element_id, self.start_time, self.end_time, + self.start_offset, self.end_offset, frame_pprint) - def _writeData(self): - data = [] - for writer in self._framespec: - data.append(writer.write(self, getattr(self, writer.name))) - for writer in self._optionalspec: - try: - data.append(writer.write(self, getattr(self, writer.name))) - except AttributeError: - break - return b''.join(data) - def __repr__(self): - kw = [] - for attr in self._framespec: - kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) - for attr in self._optionalspec: - if hasattr(self, attr.name): - kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) - return '%s(%s)' % (type(self).__name__, ', '.join(kw)) +class CTOC(Frame): + """Table of contents""" + + _framespec = [ + Latin1TextSpec("element_id"), + CTOCFlagsSpec("flags", default=0), + Latin1TextListSpec("child_element_ids"), + ID3FramesSpec("sub_frames"), + ] + + @property + def HashKey(self): + return '%s:%s' % (self.FrameID, self.element_id) + + __hash__ = Frame.__hash__ + + def __eq__(self, other): + if not isinstance(other, CTOC): + return False + + self_frames = self.sub_frames or {} + other_frames = other.sub_frames or {} + if sorted(self_frames.values()) != sorted(other_frames.values()): + return False + + return self.element_id == other.element_id and \ + self.flags == other.flags and \ + self.child_element_ids == other.child_element_ids + + def _pprint(self): + frame_pprint = u"" + if getattr(self, "sub_frames", None): + frame_pprint += "\n" + "\n".join( + [" " * 4 + f.pprint() for f in self.sub_frames.values()]) + return u"%s flags=%d child_element_ids=%s%s" % ( + self.element_id, int(self.flags), + u",".join(self.child_element_ids), frame_pprint) @swap_to_string @@ -292,8 +394,8 @@ class TextFrame(Frame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] def __bytes__(self): @@ -327,6 +429,13 @@ class TextFrame(Frame): return self.text.extend(value) + def _merge_frame(self, other): + # merge in new values + for val in other[:]: + if val not in self: + self.append(val) + return self + def _pprint(self): return " / ".join(self.text) @@ -341,8 +450,9 @@ class NumericTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000', + default=[]), ] def __pos__(self): @@ -361,8 +471,9 @@ class NumericPartTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000'), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000', + default=[]), ] def __pos__(self): @@ -378,8 +489,8 @@ class TimeStampTextFrame(TextFrame): """ _framespec = [ - EncodingSpec('encoding'), - MultiSpec('text', TimeStampSpec('stamp'), sep=u','), + EncodingSpec('encoding', default=Encoding.UTF16), + MultiSpec('text', TimeStampSpec('stamp'), sep=u',', default=[]), ] def __bytes__(self): @@ -405,7 +516,9 @@ class UrlFrame(Frame): ASCII. """ - _framespec = [Latin1TextSpec('url')] + _framespec = [ + Latin1TextSpec('url'), + ] def __bytes__(self): return self.url.encode('utf-8') @@ -532,6 +645,30 @@ class TDES(TextFrame): "iTunes Podcast Description" +class TKWD(TextFrame): + "iTunes Podcast Keywords" + + +class TCAT(TextFrame): + "iTunes Podcast Category" + + +class MVNM(TextFrame): + "iTunes Movement Name" + + +class MVN(MVNM): + "iTunes Movement Name" + + +class MVIN(NumericPartTextFrame): + "iTunes Movement Number/Count" + + +class MVI(MVIN): + "iTunes Movement Number/Count" + + class TDOR(TimeStampTextFrame): "Original Release Time" @@ -723,7 +860,7 @@ class TXXX(TextFrame): _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -777,7 +914,7 @@ class WXXX(UrlFrame): """ _framespec = [ - EncodingSpec('encoding'), + EncodingSpec('encoding', default=Encoding.UTF16), EncodedTextSpec('desc'), Latin1TextSpec('url'), ] @@ -800,10 +937,10 @@ class PairedTextFrame(Frame): """ _framespec = [ - EncodingSpec('encoding'), + EncodingSpec('encoding', default=Encoding.UTF16), MultiSpec('people', EncodedTextSpec('involvement'), - EncodedTextSpec('person')) + EncodedTextSpec('person'), default=[]) ] def __eq__(self, other): @@ -830,7 +967,9 @@ class BinaryFrame(Frame): The 'data' attribute contains the raw byte string. """ - _framespec = [BinaryDataSpec('data')] + _framespec = [ + BinaryDataSpec('data'), + ] def __eq__(self, other): return self.data == other @@ -846,8 +985,8 @@ class ETCO(Frame): """Event timing codes.""" _framespec = [ - ByteSpec("format"), - KeyEventSpec("events"), + ByteSpec("format", default=1), + KeyEventSpec("events", default=[]), ] def __eq__(self, other): @@ -864,11 +1003,11 @@ class MLLT(Frame): """ _framespec = [ - SizedIntegerSpec('frames', 2), - SizedIntegerSpec('bytes', 3), - SizedIntegerSpec('milliseconds', 3), - ByteSpec('bits_for_bytes'), - ByteSpec('bits_for_milliseconds'), + SizedIntegerSpec('frames', size=2, default=0), + SizedIntegerSpec('bytes', size=3, default=0), + SizedIntegerSpec('milliseconds', size=3, default=0), + ByteSpec('bits_for_bytes', default=0), + ByteSpec('bits_for_milliseconds', default=0), BinaryDataSpec('data'), ] @@ -886,7 +1025,7 @@ class SYTC(Frame): """ _framespec = [ - ByteSpec("format"), + ByteSpec("format", default=1), BinaryDataSpec("data"), ] @@ -905,8 +1044,8 @@ class USLT(Frame): """ _framespec = [ - EncodingSpec('encoding'), - StringSpec('lang', 3), + EncodingSpec('encoding', default=Encoding.UTF16), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('desc'), EncodedTextSpec('text'), ] @@ -933,9 +1072,9 @@ class SYLT(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), - ByteSpec('format'), - ByteSpec('type'), + StringSpec('lang', length=3, default=u"XXX"), + ByteSpec('format', default=1), + ByteSpec('type', default=0), EncodedTextSpec('desc'), SynchronizedTextSpec('text'), ] @@ -965,9 +1104,9 @@ class COMM(TextFrame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), + StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -997,9 +1136,9 @@ class RVA2(Frame): _framespec = [ Latin1TextSpec('desc'), - ChannelSpec('channel'), - VolumeAdjustmentSpec('gain'), - VolumePeakSpec('peak'), + ChannelSpec('channel', default=1), + VolumeAdjustmentSpec('gain', default=1), + VolumePeakSpec('peak', default=1), ] _channels = ["Other", "Master volume", "Front right", "Front left", @@ -1037,9 +1176,9 @@ class EQU2(Frame): """ _framespec = [ - ByteSpec("method"), + ByteSpec("method", default=0), Latin1TextSpec("desc"), - VolumeAdjustmentsSpec("adjustments"), + VolumeAdjustmentsSpec("adjustments", default=[]), ] def __eq__(self, other): @@ -1052,7 +1191,21 @@ class EQU2(Frame): return '%s:%s' % (self.FrameID, self.desc) -# class RVAD: unsupported +class RVAD(Frame): + """Relative volume adjustment""" + + _framespec = [ + RVASpec("adjustments", stereo_only=False), + ] + + __hash__ = Frame.__hash__ + + def __eq__(self, other): + if not isinstance(other, RVAD): + return False + return self.adjustments == other.adjustments + + # class EQUA: unsupported @@ -1060,16 +1213,16 @@ class RVRB(Frame): """Reverb.""" _framespec = [ - SizedIntegerSpec('left', 2), - SizedIntegerSpec('right', 2), - ByteSpec('bounce_left'), - ByteSpec('bounce_right'), - ByteSpec('feedback_ltl'), - ByteSpec('feedback_ltr'), - ByteSpec('feedback_rtr'), - ByteSpec('feedback_rtl'), - ByteSpec('premix_ltr'), - ByteSpec('premix_rtl'), + SizedIntegerSpec('left', size=2, default=0), + SizedIntegerSpec('right', size=2, default=0), + ByteSpec('bounce_left', default=0), + ByteSpec('bounce_right', default=0), + ByteSpec('feedback_ltl', default=0), + ByteSpec('feedback_ltr', default=0), + ByteSpec('feedback_rtr', default=0), + ByteSpec('feedback_rtl', default=0), + ByteSpec('premix_ltr', default=0), + ByteSpec('premix_rtl', default=0), ] def __eq__(self, other): @@ -1095,7 +1248,7 @@ class APIC(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'), - ByteSpec('type'), + PictureTypeSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data'), ] @@ -1109,15 +1262,17 @@ class APIC(Frame): def HashKey(self): return '%s:%s' % (self.FrameID, self.desc) - def _validate_from_22(self, other, checker): - if checker.name == "mime": - self.mime = other.mime.decode("ascii", "ignore") - else: - super(APIC, self)._validate_from_22(other, checker) + def _merge_frame(self, other): + other.desc += u" " + return other def _pprint(self): - return "%s (%s, %d bytes)" % ( - self.desc, self.mime, len(self.data)) + type_desc = text_type(self.type) + if hasattr(self.type, "_pprint"): + type_desc = self.type._pprint() + + return "%s, %s (%s, %d bytes)" % ( + type_desc, self.desc, self.mime, len(self.data)) class PCNT(Frame): @@ -1129,7 +1284,9 @@ class PCNT(Frame): This frame is basically obsoleted by POPM. """ - _framespec = [IntegerSpec('count')] + _framespec = [ + IntegerSpec('count', default=0), + ] def __eq__(self, other): return self.count == other @@ -1143,7 +1300,26 @@ class PCNT(Frame): return text_type(self.count) -class POPM(FrameOpt): +class PCST(Frame): + """iTunes Podcast Flag""" + + _framespec = [ + IntegerSpec('value', default=0), + ] + + def __eq__(self, other): + return self.value == other + + __hash__ = Frame.__hash__ + + def __pos__(self): + return self.value + + def _pprint(self): + return text_type(self.value) + + +class POPM(Frame): """Popularimeter. This frame keys a rating (out of 255) and a play count to an email @@ -1158,10 +1334,12 @@ class POPM(FrameOpt): _framespec = [ Latin1TextSpec('email'), - ByteSpec('rating'), + ByteSpec('rating', default=0), ] - _optionalspec = [IntegerSpec('count')] + _optionalspec = [ + IntegerSpec('count', default=0), + ] @property def HashKey(self): @@ -1170,7 +1348,7 @@ class POPM(FrameOpt): def __eq__(self, other): return self.rating == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ def __pos__(self): return self.rating @@ -1212,7 +1390,7 @@ class GEOB(Frame): __hash__ = Frame.__hash__ -class RBUF(FrameOpt): +class RBUF(Frame): """Recommended buffer size. Attributes: @@ -1224,24 +1402,26 @@ class RBUF(FrameOpt): Mutagen will not find the next tag itself. """ - _framespec = [SizedIntegerSpec('size', 3)] + _framespec = [ + SizedIntegerSpec('size', size=3, default=0), + ] _optionalspec = [ - ByteSpec('info'), - SizedIntegerSpec('offset', 4), + ByteSpec('info', default=0), + SizedIntegerSpec('offset', size=4, default=0), ] def __eq__(self, other): return self.size == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ def __pos__(self): return self.size @swap_to_string -class AENC(FrameOpt): +class AENC(Frame): """Audio encryption. Attributes: @@ -1256,12 +1436,11 @@ class AENC(FrameOpt): _framespec = [ Latin1TextSpec('owner'), - SizedIntegerSpec('preview_start', 2), - SizedIntegerSpec('preview_length', 2), + SizedIntegerSpec('preview_start', size=2, default=0), + SizedIntegerSpec('preview_length', size=2, default=0), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): return '%s:%s' % (self.FrameID, self.owner) @@ -1275,10 +1454,10 @@ class AENC(FrameOpt): def __eq__(self, other): return self.owner == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ -class LINK(FrameOpt): +class LINK(Frame): """Linked information. Attributes: @@ -1289,27 +1468,20 @@ class LINK(FrameOpt): """ _framespec = [ - StringSpec('frameid', 4), + FrameIDSpec('frameid', length=4), Latin1TextSpec('url'), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): - try: - return "%s:%s:%s:%s" % ( - self.FrameID, self.frameid, self.url, _bytes2key(self.data)) - except AttributeError: - return "%s:%s:%s" % (self.FrameID, self.frameid, self.url) + return "%s:%s:%s:%s" % ( + self.FrameID, self.frameid, self.url, _bytes2key(self.data)) def __eq__(self, other): - try: - return (self.frameid, self.url, self.data) == other - except AttributeError: - return (self.frameid, self.url) == other + return (self.frameid, self.url, self.data) == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ class POSS(Frame): @@ -1322,8 +1494,8 @@ class POSS(Frame): """ _framespec = [ - ByteSpec('format'), - IntegerSpec('position'), + ByteSpec('format', default=1), + IntegerSpec('position', default=0), ] def __pos__(self): @@ -1378,7 +1550,7 @@ class USER(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', 3), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('text'), ] @@ -1408,7 +1580,7 @@ class OWNE(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('date', 8), + StringSpec('date', length=8, default=u"19700101"), EncodedTextSpec('seller'), ] @@ -1424,15 +1596,15 @@ class OWNE(Frame): __hash__ = Frame.__hash__ -class COMR(FrameOpt): +class COMR(Frame): """Commercial frame.""" _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('valid_until', 8), + StringSpec('valid_until', length=8, default=u"19700101"), Latin1TextSpec('contact'), - ByteSpec('format'), + ByteSpec('format', default=0), EncodedTextSpec('seller'), EncodedTextSpec('desc'), ] @@ -1449,7 +1621,7 @@ class COMR(FrameOpt): def __eq__(self, other): return self._writeData() == other._writeData() - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ @swap_to_string @@ -1462,7 +1634,7 @@ class ENCR(Frame): _framespec = [ Latin1TextSpec('owner'), - ByteSpec('method'), + ByteSpec('method', default=0x80), BinaryDataSpec('data'), ] @@ -1480,16 +1652,15 @@ class ENCR(Frame): @swap_to_string -class GRID(FrameOpt): +class GRID(Frame): """Group identification registration.""" _framespec = [ Latin1TextSpec('owner'), - ByteSpec('group'), + ByteSpec('group', default=0x80), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - @property def HashKey(self): return '%s:%s' % (self.FrameID, self.group) @@ -1506,7 +1677,7 @@ class GRID(FrameOpt): def __eq__(self, other): return self.owner == other or self.group == other - __hash__ = FrameOpt.__hash__ + __hash__ = Frame.__hash__ @swap_to_string @@ -1540,7 +1711,7 @@ class SIGN(Frame): """Signature frame.""" _framespec = [ - ByteSpec('group'), + ByteSpec('group', default=0x80), BinaryDataSpec('sig'), ] @@ -1563,7 +1734,9 @@ class SEEK(Frame): Mutagen does not find tags at seek offsets. """ - _framespec = [IntegerSpec('offset')] + _framespec = [ + IntegerSpec('offset', default=0), + ] def __pos__(self): return self.offset @@ -1580,12 +1753,13 @@ class ASPI(Frame): Attributes: S, L, N, b, and Fi. For the meaning of these, see the ID3v2.4 specification. Fi is a list of integers. """ + _framespec = [ - SizedIntegerSpec("S", 4), - SizedIntegerSpec("L", 4), - SizedIntegerSpec("N", 2), - ByteSpec("b"), - ASPIIndexSpec("Fi"), + SizedIntegerSpec("S", size=4, default=0), + SizedIntegerSpec("L", size=4, default=0), + SizedIntegerSpec("N", size=2, default=0), + ByteSpec("b", default=0), + ASPIIndexSpec("Fi", default=[]), ] def __eq__(self, other): @@ -1703,6 +1877,26 @@ class TEN(TENC): "Encoder" +class TST(TSOT): + "Title Sort Order key" + + +class TSA(TSOA): + "Album Sort Order key" + + +class TS2(TSO2): + "iTunes Album Artist Sort" + + +class TSP(TSOP): + "Perfomer Sort Order key" + + +class TSC(TSOC): + "iTunes Composer Sort" + + class TSS(TSSE): "Encoder settings" @@ -1807,7 +2001,19 @@ class COM(COMM): "Comment" -# class RVA(RVAD) +class RVA(RVAD): + "Relative volume adjustment" + + _framespec = [ + RVASpec("adjustments", stereo_only=True), + ] + + def _to_other(self, other): + if not isinstance(other, RVAD): + raise TypeError + + other.adjustments = list(self.adjustments) + # class EQU(EQUA) @@ -1824,10 +2030,10 @@ class PIC(APIC): _framespec = [ EncodingSpec('encoding'), - StringSpec('mime', 3), - ByteSpec('type'), + StringSpec('mime', length=3, default="JPG"), + PictureTypeSpec('type'), EncodedTextSpec('desc'), - BinaryDataSpec('data') + BinaryDataSpec('data'), ] def _to_other(self, other): @@ -1859,8 +2065,12 @@ class BUF(RBUF): class CRM(Frame): """Encrypted meta frame""" - _framespec = [Latin1TextSpec('owner'), Latin1TextSpec('desc'), - BinaryDataSpec('data')] + + _framespec = [ + Latin1TextSpec('owner'), + Latin1TextSpec('desc'), + BinaryDataSpec('data'), + ] def __eq__(self, other): return self.data == other @@ -1875,20 +2085,28 @@ class LNK(LINK): """Linked information""" _framespec = [ - StringSpec('frameid', 3), - Latin1TextSpec('url') + FrameIDSpec('frameid', length=3), + Latin1TextSpec('url'), + BinaryDataSpec('data'), ] - _optionalspec = [BinaryDataSpec('data')] - def _to_other(self, other): if not isinstance(other, LINK): raise TypeError - other.frameid = self.frameid + if isinstance(other, LNK): + new_frameid = self.frameid + else: + try: + new_frameid = Frames_2_2[self.frameid].__bases__[0].__name__ + except KeyError: + new_frameid = self.frameid.ljust(4) + + # we could end up with invalid IDs here, so bypass the validation + other._setattr("frameid", new_frameid) + other.url = self.url - if hasattr(self, "data"): - other.data = self.data + other.data = self.data Frames = {} diff --git a/lib/mutagen/id3/_id3v1.py b/lib/mutagen/id3/_id3v1.py new file mode 100755 index 00000000..d41d00d0 --- /dev/null +++ b/lib/mutagen/id3/_id3v1.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2005 Michael Urman +# 2006 Lukas Lalinsky +# 2013 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import errno +from struct import error as StructError, unpack + +from mutagen._util import chr_, text_type + +from ._frames import TCON, TRCK, COMM, TDRC, TALB, TPE1, TIT2 + + +def find_id3v1(fileobj): + """Returns a tuple of (id3tag, offset_to_end) or (None, 0) + + offset mainly because we used to write too short tags in some cases and + we need the offset to delete them. + """ + + # id3v1 is always at the end (after apev2) + + extra_read = b"APETAGEX".index(b"TAG") + + try: + fileobj.seek(-128 - extra_read, 2) + except IOError as e: + if e.errno == errno.EINVAL: + # If the file is too small, might be ok since we wrote too small + # tags at some point. let's see how the parsing goes.. + fileobj.seek(0, 0) + else: + raise + + data = fileobj.read(128 + extra_read) + try: + idx = data.index(b"TAG") + except ValueError: + return (None, 0) + else: + # FIXME: make use of the apev2 parser here + # if TAG is part of APETAGEX assume this is an APEv2 tag + try: + ape_idx = data.index(b"APETAGEX") + except ValueError: + pass + else: + if idx == ape_idx + extra_read: + return (None, 0) + + tag = ParseID3v1(data[idx:]) + if tag is None: + return (None, 0) + + offset = idx - len(data) + return (tag, offset) + + +# ID3v1.1 support. +def ParseID3v1(data): + """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. + + Returns a {frame_name: frame} dict or None. + """ + + try: + data = data[data.index(b"TAG"):] + except ValueError: + return None + if 128 < len(data) or len(data) < 124: + return None + + # Issue #69 - Previous versions of Mutagen, when encountering + # out-of-spec TDRC and TYER frames of less than four characters, + # wrote only the characters available - e.g. "1" or "" - into the + # year field. To parse those, reduce the size of the year field. + # Amazingly, "0s" works as a struct format string. + unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124) + + try: + tag, title, artist, album, year, comment, track, genre = unpack( + unpack_fmt, data) + except StructError: + return None + + if tag != b"TAG": + return None + + def fix(data): + return data.split(b"\x00")[0].strip().decode('latin1') + + title, artist, album, year, comment = map( + fix, [title, artist, album, year, comment]) + + frames = {} + if title: + frames["TIT2"] = TIT2(encoding=0, text=title) + if artist: + frames["TPE1"] = TPE1(encoding=0, text=[artist]) + if album: + frames["TALB"] = TALB(encoding=0, text=album) + if year: + frames["TDRC"] = TDRC(encoding=0, text=year) + if comment: + frames["COMM"] = COMM( + encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) + # Don't read a track number if it looks like the comment was + # padded with spaces instead of nulls (thanks, WinAmp). + if track and ((track != 32) or (data[-3] == b'\x00'[0])): + frames["TRCK"] = TRCK(encoding=0, text=str(track)) + if genre != 255: + frames["TCON"] = TCON(encoding=0, text=str(genre)) + return frames + + +def MakeID3v1(id3): + """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames.""" + + v1 = {} + + for v2id, name in {"TIT2": "title", "TPE1": "artist", + "TALB": "album"}.items(): + if v2id in id3: + text = id3[v2id].text[0].encode('latin1', 'replace')[:30] + else: + text = b"" + v1[name] = text + (b"\x00" * (30 - len(text))) + + if "COMM" in id3: + cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] + else: + cmnt = b"" + v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt))) + + if "TRCK" in id3: + try: + v1["track"] = chr_(+id3["TRCK"]) + except ValueError: + v1["track"] = b"\x00" + else: + v1["track"] = b"\x00" + + if "TCON" in id3: + try: + genre = id3["TCON"].genres[0] + except IndexError: + pass + else: + if genre in TCON.GENRES: + v1["genre"] = chr_(TCON.GENRES.index(genre)) + if "genre" not in v1: + v1["genre"] = b"\xff" + + if "TDRC" in id3: + year = text_type(id3["TDRC"]).encode('ascii') + elif "TYER" in id3: + year = text_type(id3["TYER"]).encode('ascii') + else: + year = b"" + v1["year"] = (year + b"\x00\x00\x00\x00")[:4] + + return ( + b"TAG" + + v1["title"] + + v1["artist"] + + v1["album"] + + v1["year"] + + v1["comment"] + + v1["track"] + + v1["genre"] + ) diff --git a/lib/mutagen/id3/_specs.py b/lib/mutagen/id3/_specs.py old mode 100644 new mode 100755 index 351d5319..bb27f72c --- a/lib/mutagen/id3/_specs.py +++ b/lib/mutagen/id3/_specs.py @@ -1,24 +1,119 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct +import codecs from struct import unpack, pack -from warnings import warn -from .._compat import text_type, chr_, PY3, swap_to_string, string_types -from .._util import total_ordering, decode_terminated, enum -from ._util import ID3JunkFrameError, ID3Warning, BitPaddedInt +from .._compat import text_type, chr_, PY3, swap_to_string, string_types, \ + xrange +from .._util import total_ordering, decode_terminated, enum, izip, flags, \ + cdata, encode_endian +from ._util import BitPaddedInt, is_valid_frame_id + + +@enum +class PictureType(object): + """Enumeration of image types defined by the ID3 standard for the APIC + frame, but also reused in WMA/FLAC/VorbisComment. + """ + + OTHER = 0 + """Other""" + + FILE_ICON = 1 + """32x32 pixels 'file icon' (PNG only)""" + + OTHER_FILE_ICON = 2 + """Other file icon""" + + COVER_FRONT = 3 + """Cover (front)""" + + COVER_BACK = 4 + """Cover (back)""" + + LEAFLET_PAGE = 5 + """Leaflet page""" + + MEDIA = 6 + """Media (e.g. label side of CD)""" + + LEAD_ARTIST = 7 + """Lead artist/lead performer/soloist""" + + ARTIST = 8 + """Artist/performer""" + + CONDUCTOR = 9 + """Conductor""" + + BAND = 10 + """Band/Orchestra""" + + COMPOSER = 11 + """Composer""" + + LYRICIST = 12 + """Lyricist/text writer""" + + RECORDING_LOCATION = 13 + """Recording Location""" + + DURING_RECORDING = 14 + """During recording""" + + DURING_PERFORMANCE = 15 + """During performance""" + + SCREEN_CAPTURE = 16 + """Movie/video screen capture""" + + FISH = 17 + """A bright coloured fish""" + + ILLUSTRATION = 18 + """Illustration""" + + BAND_LOGOTYPE = 19 + """Band/artist logotype""" + + PUBLISHER_LOGOTYPE = 20 + """Publisher/Studio logotype""" + + def _pprint(self): + return text_type(self).split(".", 1)[-1].lower().replace("_", " ") + + +@flags +class CTOCFlags(object): + + TOP_LEVEL = 0x2 + """Identifies the CTOC root frame""" + + ORDERED = 0x1 + """Child elements are ordered""" + + +class SpecError(Exception): + pass class Spec(object): - def __init__(self, name): + handle_nodata = False + """If reading empty data is possible and writing it back will again + result in no data. + """ + + def __init__(self, name, default): self.name = name + self.default = default def __hash__(self): raise TypeError("Spec objects are unhashable") @@ -30,23 +125,46 @@ class Spec(object): return value - def read(self, frame, value): + def read(self, header, frame, data): + """ + Returns: + (value: object, left_data: bytes) + Raises: + SpecError + """ + raise NotImplementedError - def write(self, frame, value): + def write(self, config, frame, value): + """ + Returns: + bytes: The serialized data + Raises: + SpecError + """ raise NotImplementedError def validate(self, frame, value): - """Returns the validated data or raises ValueError/TypeError""" + """ + Returns: + the validated value + Raises: + ValueError + TypeError + """ raise NotImplementedError class ByteSpec(Spec): - def read(self, frame, data): + + def __init__(self, name, default=0): + super(ByteSpec, self).__init__(name, default) + + def read(self, header, frame, data): return bytearray(data)[0], data[1:] - def write(self, frame, value): + def write(self, config, frame, value): return chr_(value) def validate(self, frame, value): @@ -55,11 +173,40 @@ class ByteSpec(Spec): return value +class PictureTypeSpec(ByteSpec): + + def __init__(self, name, default=PictureType.COVER_FRONT): + super(PictureTypeSpec, self).__init__(name, default) + + def read(self, header, frame, data): + value, data = ByteSpec.read(self, header, frame, data) + return PictureType(value), data + + def validate(self, frame, value): + value = ByteSpec.validate(self, frame, value) + if value is not None: + return PictureType(value) + return value + + +class CTOCFlagsSpec(ByteSpec): + + def read(self, header, frame, data): + value, data = ByteSpec.read(self, header, frame, data) + return CTOCFlags(value), data + + def validate(self, frame, value): + value = ByteSpec.validate(self, frame, value) + if value is not None: + return CTOCFlags(value) + return value + + class IntegerSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): return int(BitPaddedInt(data, bits=8)), b'' - def write(self, frame, value): + def write(self, config, frame, value): return BitPaddedInt.to_str(value, bits=8, width=-1) def validate(self, frame, value): @@ -67,13 +214,15 @@ class IntegerSpec(Spec): class SizedIntegerSpec(Spec): - def __init__(self, name, size): - self.name, self.__sz = name, size - def read(self, frame, data): + def __init__(self, name, size, default): + self.name, self.__sz = name, size + self.default = default + + def read(self, header, frame, data): return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:] - def write(self, frame, value): + def write(self, config, frame, value): return BitPaddedInt.to_str(value, bits=8, width=self.__sz) def validate(self, frame, value): @@ -82,84 +231,197 @@ class SizedIntegerSpec(Spec): @enum class Encoding(object): + """Text Encoding""" + LATIN1 = 0 + """ISO-8859-1""" + UTF16 = 1 + """UTF-16 with BOM""" + UTF16BE = 2 + """UTF-16BE without BOM""" + UTF8 = 3 + """UTF-8""" class EncodingSpec(ByteSpec): - def read(self, frame, data): - enc, data = super(EncodingSpec, self).read(frame, data) - if enc < 16: - return enc, data - else: - return 0, chr_(enc) + data + + def __init__(self, name, default=Encoding.UTF16): + super(EncodingSpec, self).__init__(name, default) + + def read(self, header, frame, data): + enc, data = super(EncodingSpec, self).read(header, frame, data) + if enc not in (Encoding.LATIN1, Encoding.UTF16, Encoding.UTF16BE, + Encoding.UTF8): + raise SpecError('Invalid Encoding: %r' % enc) + return Encoding(enc), data def validate(self, frame, value): if value is None: - return None - if 0 <= value <= 3: - return value - raise ValueError('Invalid Encoding: %r' % value) + raise TypeError + if value not in (Encoding.LATIN1, Encoding.UTF16, Encoding.UTF16BE, + Encoding.UTF8): + raise ValueError('Invalid Encoding: %r' % value) + return Encoding(value) def _validate23(self, frame, value, **kwargs): # only 0, 1 are valid in v2.3, default to utf-16 - return min(1, value) + if value not in (Encoding.LATIN1, Encoding.UTF16): + value = Encoding.UTF16 + return value class StringSpec(Spec): """A fixed size ASCII only payload.""" - def __init__(self, name, length): - super(StringSpec, self).__init__(name) + def __init__(self, name, length, default=None): + if default is None: + default = u" " * length + super(StringSpec, self).__init__(name, default) self.len = length - def read(s, frame, data): + def read(s, header, frame, data): chunk = data[:s.len] try: ascii = chunk.decode("ascii") except UnicodeDecodeError: - raise ID3JunkFrameError("not ascii") + raise SpecError("not ascii") else: if PY3: chunk = ascii return chunk, data[s.len:] - def write(s, frame, value): - if value is None: - return b'\x00' * s.len - else: - if PY3: - value = value.encode("ascii") - return (bytes(value) + b'\x00' * s.len)[:s.len] + def write(self, config, frame, value): + if PY3: + value = value.encode("ascii") + return (bytes(value) + b'\x00' * self.len)[:self.len] - def validate(s, frame, value): + def validate(self, frame, value): if value is None: - return None - + raise TypeError if PY3: if not isinstance(value, str): - raise TypeError("%s has to be str" % s.name) + raise TypeError("%s has to be str" % self.name) value.encode("ascii") else: if not isinstance(value, bytes): value = value.encode("ascii") - if len(value) == s.len: + if len(value) == self.len: return value - raise ValueError('Invalid StringSpec[%d] data: %r' % (s.len, value)) + raise ValueError('Invalid StringSpec[%d] data: %r' % (self.len, value)) + + +class RVASpec(Spec): + + def __init__(self, name, stereo_only, default=[0, 0]): + # two_chan: RVA has only 2 channels, while RVAD has 6 channels + super(RVASpec, self).__init__(name, default) + self._max_values = 4 if stereo_only else 12 + + def read(self, header, frame, data): + # inc/dec flags + spec = ByteSpec("flags", 0) + flags, data = spec.read(header, frame, data) + if not data: + raise SpecError("truncated") + + # how many bytes per value + bits, data = spec.read(header, frame, data) + if bits == 0: + # not allowed according to spec + raise SpecError("bits used has to be > 0") + bytes_per_value = (bits + 7) // 8 + + values = [] + while len(data) >= bytes_per_value and len(values) < self._max_values: + v = BitPaddedInt(data[:bytes_per_value], bits=8) + data = data[bytes_per_value:] + values.append(v) + + if len(values) < 2: + raise SpecError("First two values not optional") + + # if the respective flag bit is zero, take as decrement + for bit, index in enumerate([0, 1, 4, 5, 8, 10]): + if not cdata.test_bit(flags, bit): + try: + values[index] = -values[index] + except IndexError: + break + + return values, data + + def write(self, config, frame, values): + if len(values) < 2 or len(values) > self._max_values: + raise SpecError( + "at least two volume change values required, max %d" % + self._max_values) + + spec = ByteSpec("flags", 0) + + flags = 0 + values = list(values) + for bit, index in enumerate([0, 1, 4, 5, 8, 10]): + try: + if values[index] < 0: + values[index] = -values[index] + else: + flags |= (1 << bit) + except IndexError: + break + + buffer_ = bytearray() + buffer_.extend(spec.write(config, frame, flags)) + + # serialized and make them all the same size (min 2 bytes) + byte_values = [ + BitPaddedInt.to_str(v, bits=8, width=-1, minwidth=2) + for v in values] + max_bytes = max([len(v) for v in byte_values]) + byte_values = [v.ljust(max_bytes, b"\x00") for v in byte_values] + + bits = max_bytes * 8 + buffer_.extend(spec.write(config, frame, bits)) + + for v in byte_values: + buffer_.extend(v) + + return bytes(buffer_) + + def validate(self, frame, values): + if len(values) < 2 or len(values) > self._max_values: + raise ValueError("needs list of length 2..%d" % self._max_values) + return values + + +class FrameIDSpec(StringSpec): + + def __init__(self, name, length): + super(FrameIDSpec, self).__init__(name, length, u"X" * length) + + def validate(self, frame, value): + value = super(FrameIDSpec, self).validate(frame, value) + if not is_valid_frame_id(value): + raise ValueError("Invalid frame ID") + return value class BinaryDataSpec(Spec): - def read(self, frame, data): + + handle_nodata = True + + def __init__(self, name, default=b""): + super(BinaryDataSpec, self).__init__(name, default) + + def read(self, header, frame, data): return data, b'' - def write(self, frame, value): - if value is None: - return b"" + def write(self, config, frame, value): if isinstance(value, bytes): return value value = text_type(value).encode("ascii") @@ -167,8 +429,7 @@ class BinaryDataSpec(Spec): def validate(self, frame, value): if value is None: - return None - + raise TypeError if isinstance(value, bytes): return value elif PY3: @@ -178,36 +439,58 @@ class BinaryDataSpec(Spec): return value -class EncodedTextSpec(Spec): - # Okay, seriously. This is private and defined explicitly and - # completely by the ID3 specification. You can't just add - # encodings here however you want. - _encodings = ( - ('latin1', b'\x00'), - ('utf16', b'\x00\x00'), - ('utf_16_be', b'\x00\x00'), - ('utf8', b'\x00') - ) +def iter_text_fixups(data, encoding): + """Yields a series of repaired text values for decoding""" - def read(self, frame, data): + yield data + if encoding == Encoding.UTF16BE: + # wrong termination + yield data + b"\x00" + elif encoding == Encoding.UTF16: + # wrong termination + yield data + b"\x00" + # utf-16 is missing BOM, content is usually utf-16-le + yield codecs.BOM_UTF16_LE + data + # both cases combined + yield codecs.BOM_UTF16_LE + data + b"\x00" + + +class EncodedTextSpec(Spec): + + _encodings = { + Encoding.LATIN1: ('latin1', b'\x00'), + Encoding.UTF16: ('utf16', b'\x00\x00'), + Encoding.UTF16BE: ('utf_16_be', b'\x00\x00'), + Encoding.UTF8: ('utf8', b'\x00'), + } + + def __init__(self, name, default=u""): + super(EncodedTextSpec, self).__init__(name, default) + + def read(self, header, frame, data): + enc, term = self._encodings[frame.encoding] + err = None + for data in iter_text_fixups(data, frame.encoding): + try: + value, data = decode_terminated(data, enc, strict=False) + except ValueError as e: + err = e + else: + # Older id3 did not support multiple values, but we still + # read them. To not missinterpret zero padded values with + # a list of empty strings, stop if everything left is zero. + # https://github.com/quodlibet/mutagen/issues/276 + if header.version < header._V24 and not data.strip(b"\x00"): + data = b"" + return value, data + raise SpecError(err) + + def write(self, config, frame, value): enc, term = self._encodings[frame.encoding] try: - # allow missing termination - return decode_terminated(data, enc, strict=False) - except ValueError: - # utf-16 termination with missing BOM, or single NULL - if not data[:len(term)].strip(b"\x00"): - return u"", data[len(term):] - - # utf-16 data with single NULL, see issue 169 - try: - return decode_terminated(data + b"\x00", enc) - except ValueError: - raise ID3JunkFrameError - - def write(self, frame, value): - enc, term = self._encodings[frame.encoding] - return value.encode(enc) + term + return encode_endian(value, enc, le=True) + term + except UnicodeEncodeError as e: + raise SpecError(e) def validate(self, frame, value): return text_type(value) @@ -215,16 +498,16 @@ class EncodedTextSpec(Spec): class MultiSpec(Spec): def __init__(self, name, *specs, **kw): - super(MultiSpec, self).__init__(name) + super(MultiSpec, self).__init__(name, default=kw.get('default')) self.specs = specs self.sep = kw.get('sep') - def read(self, frame, data): + def read(self, header, frame, data): values = [] while data: record = [] for spec in self.specs: - value, data = spec.read(frame, data) + value, data = spec.read(header, frame, data) record.append(value) if len(self.specs) != 1: values.append(record) @@ -232,20 +515,18 @@ class MultiSpec(Spec): values.append(record[0]) return values, data - def write(self, frame, value): + def write(self, config, frame, value): data = [] if len(self.specs) == 1: for v in value: - data.append(self.specs[0].write(frame, v)) + data.append(self.specs[0].write(config, frame, v)) else: for record in value: - for v, s in zip(record, self.specs): - data.append(s.write(frame, v)) + for v, s in izip(record, self.specs): + data.append(s.write(config, frame, v)) return b''.join(data) def validate(self, frame, value): - if value is None: - return [] if self.sep and isinstance(value, string_types): value = value.split(self.sep) if isinstance(value, list): @@ -253,14 +534,14 @@ class MultiSpec(Spec): return [self.specs[0].validate(frame, v) for v in value] else: return [ - [s.validate(frame, v) for (v, s) in zip(val, self.specs)] + [s.validate(frame, v) for (v, s) in izip(val, self.specs)] for val in value] raise ValueError('Invalid MultiSpec data: %r' % value) def _validate23(self, frame, value, **kwargs): if len(self.specs) != 1: return [[s._validate23(frame, v, **kwargs) - for (v, s) in zip(val, self.specs)] + for (v, s) in izip(val, self.specs)] for val in value] spec = self.specs[0] @@ -285,21 +566,87 @@ class EncodedNumericPartTextSpec(EncodedTextSpec): pass -class Latin1TextSpec(EncodedTextSpec): - def read(self, frame, data): +class Latin1TextSpec(Spec): + + def __init__(self, name, default=u""): + super(Latin1TextSpec, self).__init__(name, default) + + def read(self, header, frame, data): if b'\x00' in data: data, ret = data.split(b'\x00', 1) else: ret = b'' return data.decode('latin1'), ret - def write(self, data, value): + def write(self, config, data, value): return value.encode('latin1') + b'\x00' def validate(self, frame, value): return text_type(value) +class ID3FramesSpec(Spec): + + handle_nodata = True + + def __init__(self, name, default=[]): + super(ID3FramesSpec, self).__init__(name, default) + + def read(self, header, frame, data): + from ._tags import ID3Tags + + tags = ID3Tags() + return tags, tags._read(header, data) + + def _validate23(self, frame, value, **kwargs): + from ._tags import ID3Tags + + v = ID3Tags() + for frame in value.values(): + v.add(frame._get_v23_frame(**kwargs)) + return v + + def write(self, config, frame, value): + return bytes(value._write(config)) + + def validate(self, frame, value): + from ._tags import ID3Tags + + if isinstance(value, ID3Tags): + return value + + tags = ID3Tags() + for v in value: + tags.add(v) + + return tags + + +class Latin1TextListSpec(Spec): + + def __init__(self, name, default=[]): + super(Latin1TextListSpec, self).__init__(name, default) + self._bspec = ByteSpec("entry_count", default=0) + self._lspec = Latin1TextSpec("child_element_id") + + def read(self, header, frame, data): + count, data = self._bspec.read(header, frame, data) + entries = [] + for i in xrange(count): + entry, data = self._lspec.read(header, frame, data) + entries.append(entry) + return entries, data + + def write(self, config, frame, value): + b = self._bspec.write(config, frame, len(value)) + for v in value: + b += self._lspec.write(config, frame, v) + return b + + def validate(self, frame, value): + return [self._lspec.validate(frame, v) for v in value] + + @swap_to_string @total_ordering class ID3TimeStamp(object): @@ -360,7 +707,7 @@ class ID3TimeStamp(object): return repr(self.text) def __eq__(self, other): - return self.text == other.text + return isinstance(other, ID3TimeStamp) and self.text == other.text def __lt__(self, other): return self.text < other.text @@ -372,12 +719,12 @@ class ID3TimeStamp(object): class TimeStampSpec(EncodedTextSpec): - def read(self, frame, data): - value, data = super(TimeStampSpec, self).read(frame, data) + def read(self, header, frame, data): + value, data = super(TimeStampSpec, self).read(header, frame, data) return self.validate(frame, value), data - def write(self, frame, data): - return super(TimeStampSpec, self).write(frame, + def write(self, config, frame, data): + return super(TimeStampSpec, self).write(config, frame, data.text.replace(' ', 'T')) def validate(self, frame, value): @@ -389,32 +736,32 @@ class TimeStampSpec(EncodedTextSpec): class ChannelSpec(ByteSpec): (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE, - BACKCENTRE, SUBWOOFER) = range(9) + BACKCENTRE, SUBWOOFER) = xrange(9) class VolumeAdjustmentSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): value, = unpack('>h', data[0:2]) return value / 512.0, data[2:] - def write(self, frame, value): + def write(self, config, frame, value): number = int(round(value * 512)) # pack only fails in 2.7, do it manually in 2.6 if not -32768 <= number <= 32767: - raise struct.error + raise SpecError("not in range") return pack('>h', number) def validate(self, frame, value): if value is not None: try: - self.write(frame, value) - except struct.error: + self.write(None, frame, value) + except SpecError: raise ValueError("out of range") return value class VolumePeakSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): # http://bugs.xmms.org/attachment.cgi?id=113&action=view peak = 0 data_array = bytearray(data) @@ -422,54 +769,57 @@ class VolumePeakSpec(Spec): vol_bytes = min(4, (bits + 7) >> 3) # not enough frame data if vol_bytes + 1 > len(data): - raise ID3JunkFrameError + raise SpecError("not enough frame data") shift = ((8 - (bits & 7)) & 7) + (4 - vol_bytes) * 8 - for i in range(1, vol_bytes + 1): + for i in xrange(1, vol_bytes + 1): peak *= 256 peak += data_array[i] peak *= 2 ** shift return (float(peak) / (2 ** 31 - 1)), data[1 + vol_bytes:] - def write(self, frame, value): + def write(self, config, frame, value): number = int(round(value * 32768)) # pack only fails in 2.7, do it manually in 2.6 if not 0 <= number <= 65535: - raise struct.error + raise SpecError("not in range") # always write as 16 bits for sanity. return b"\x10" + pack('>H', number) def validate(self, frame, value): if value is not None: try: - self.write(frame, value) - except struct.error: + self.write(None, frame, value) + except SpecError: raise ValueError("out of range") return value class SynchronizedTextSpec(EncodedTextSpec): - def read(self, frame, data): + def read(self, header, frame, data): texts = [] encoding, term = self._encodings[frame.encoding] while data: try: value, data = decode_terminated(data, encoding) except ValueError: - raise ID3JunkFrameError + raise SpecError("decoding error") if len(data) < 4: - raise ID3JunkFrameError + raise SpecError("not enough data") time, = struct.unpack(">I", data[:4]) texts.append((value, time)) data = data[4:] return texts, b"" - def write(self, frame, value): + def write(self, config, frame, value): data = [] encoding, term = self._encodings[frame.encoding] for text, time in value: - text = text.encode(encoding) + term + try: + text = encode_endian(text, encoding, le=True) + term + except UnicodeEncodeError as e: + raise SpecError(e) data.append(text + struct.pack(">I", time)) return b"".join(data) @@ -478,23 +828,23 @@ class SynchronizedTextSpec(EncodedTextSpec): class KeyEventSpec(Spec): - def read(self, frame, data): + def read(self, header, frame, data): events = [] while len(data) >= 5: events.append(struct.unpack(">bI", data[:5])) data = data[5:] return events, data - def write(self, frame, value): + def write(self, config, frame, value): return b"".join(struct.pack(">bI", *event) for event in value) def validate(self, frame, value): - return value + return list(value) class VolumeAdjustmentsSpec(Spec): # Not to be confused with VolumeAdjustmentSpec. - def read(self, frame, data): + def read(self, header, frame, data): adjustments = {} while len(data) >= 4: freq, adj = struct.unpack(">Hh", data[:4]) @@ -505,17 +855,18 @@ class VolumeAdjustmentsSpec(Spec): adjustments = sorted(adjustments.items()) return adjustments, data - def write(self, frame, value): + def write(self, config, frame, value): value.sort() return b"".join(struct.pack(">Hh", int(freq * 2), int(adj * 512)) for (freq, adj) in value) def validate(self, frame, value): - return value + return list(value) class ASPIIndexSpec(Spec): - def read(self, frame, data): + + def read(self, header, frame, data): if frame.b == 16: format = "H" size = 2 @@ -523,21 +874,26 @@ class ASPIIndexSpec(Spec): format = "B" size = 1 else: - warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning) - return [], data + raise SpecError("invalid bit count in ASPI (%d)" % frame.b) indexes = data[:frame.N * size] data = data[frame.N * size:] - return list(struct.unpack(">" + format * frame.N, indexes)), data + try: + return list(struct.unpack(">" + format * frame.N, indexes)), data + except struct.error as e: + raise SpecError(e) - def write(self, frame, values): + def write(self, config, frame, values): if frame.b == 16: format = "H" elif frame.b == 8: format = "B" else: - raise ValueError("frame.b must be 8 or 16") - return struct.pack(">" + format * frame.N, *values) + raise SpecError("frame.b must be 8 or 16") + try: + return struct.pack(">" + format * frame.N, *values) + except struct.error as e: + raise SpecError(e) def validate(self, frame, values): - return values + return list(values) diff --git a/lib/mutagen/id3/_tags.py b/lib/mutagen/id3/_tags.py new file mode 100755 index 00000000..99845faa --- /dev/null +++ b/lib/mutagen/id3/_tags.py @@ -0,0 +1,638 @@ +# -*- coding: utf-8 -*- +# Copyright 2005 Michael Urman +# Copyright 2016 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +import struct + +from mutagen._tags import Tags +from mutagen._util import DictProxy, convert_error, read_full +from mutagen._compat import PY3, text_type, itervalues + +from ._util import BitPaddedInt, unsynch, ID3JunkFrameError, \ + ID3EncryptionUnsupportedError, is_valid_frame_id, error, \ + ID3NoHeaderError, ID3UnsupportedVersionError, ID3SaveConfig +from ._frames import TDRC, APIC, TDOR, TIME, TIPL, TORY, TDAT, Frames_2_2, \ + TextFrame, TYER, Frame, IPLS, Frames + + +class ID3Header(object): + + _V24 = (2, 4, 0) + _V23 = (2, 3, 0) + _V22 = (2, 2, 0) + _V11 = (1, 1) + + f_unsynch = property(lambda s: bool(s._flags & 0x80)) + f_extended = property(lambda s: bool(s._flags & 0x40)) + f_experimental = property(lambda s: bool(s._flags & 0x20)) + f_footer = property(lambda s: bool(s._flags & 0x10)) + + _known_frames = None + + @property + def known_frames(self): + if self._known_frames is not None: + return self._known_frames + elif self.version >= ID3Header._V23: + return Frames + elif self.version >= ID3Header._V22: + return Frames_2_2 + + @convert_error(IOError, error) + def __init__(self, fileobj=None): + """Raises ID3NoHeaderError, ID3UnsupportedVersionError or error""" + + if fileobj is None: + # for testing + self._flags = 0 + return + + fn = getattr(fileobj, "name", "") + data = fileobj.read(10) + if len(data) != 10: + raise ID3NoHeaderError("%s: too small" % fn) + + id3, vmaj, vrev, flags, size = struct.unpack('>3sBBB4s', data) + self._flags = flags + self.size = BitPaddedInt(size) + 10 + self.version = (2, vmaj, vrev) + + if id3 != b'ID3': + raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) + + if vmaj not in [2, 3, 4]: + raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" + % (fn, vmaj)) + + if not BitPaddedInt.has_valid_padding(size): + raise error("Header size not synchsafe") + + if (self.version >= self._V24) and (flags & 0x0f): + raise error( + "%r has invalid flags %#02x" % (fn, flags)) + elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): + raise error( + "%r has invalid flags %#02x" % (fn, flags)) + + if self.f_extended: + extsize_data = read_full(fileobj, 4) + + if PY3: + frame_id = extsize_data.decode("ascii", "replace") + else: + frame_id = extsize_data + + if frame_id in Frames: + # Some tagger sets the extended header flag but + # doesn't write an extended header; in this case, the + # ID3 data follows immediately. Since no extended + # header is going to be long enough to actually match + # a frame, and if it's *not* a frame we're going to be + # completely lost anyway, this seems to be the most + # correct check. + # https://github.com/quodlibet/quodlibet/issues/126 + self._flags ^= 0x40 + extsize = 0 + fileobj.seek(-4, 1) + elif self.version >= self._V24: + # "Where the 'Extended header size' is the size of the whole + # extended header, stored as a 32 bit synchsafe integer." + extsize = BitPaddedInt(extsize_data) - 4 + if not BitPaddedInt.has_valid_padding(extsize_data): + raise error( + "Extended header size not synchsafe") + else: + # "Where the 'Extended header size', currently 6 or 10 bytes, + # excludes itself." + extsize = struct.unpack('>L', extsize_data)[0] + + self._extdata = read_full(fileobj, extsize) + + +def determine_bpi(data, frames, EMPTY=b"\x00" * 10): + """Takes id3v2.4 frame data and determines if ints or bitpaddedints + should be used for parsing. Needed because iTunes used to write + normal ints for frame sizes. + """ + + # count number of tags found as BitPaddedInt and how far past + o = 0 + asbpi = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + bpioff = -((len(data) - o) % 10) + break + name, size, flags = struct.unpack('>4sLH', part) + size = BitPaddedInt(size) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asbpi += 1 + else: + bpioff = o - len(data) + + # count number of tags found as int and how far past + o = 0 + asint = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + intoff = -((len(data) - o) % 10) + break + name, size, flags = struct.unpack('>4sLH', part) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asint += 1 + else: + intoff = o - len(data) + + # if more tags as int, or equal and bpi is past and int is not + if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): + return int + return BitPaddedInt + + +class ID3Tags(DictProxy, Tags): + + __module__ = "mutagen.id3" + + def __init__(self, *args, **kwargs): + self.unknown_frames = [] + self._unknown_v2_version = 4 + super(ID3Tags, self).__init__(*args, **kwargs) + + def _read(self, header, data): + frames, unknown_frames, data = read_frames( + header, data, header.known_frames) + for frame in frames: + self._add(frame, False) + self.unknown_frames = unknown_frames + self._unknown_v2_version = header.version[1] + return data + + def _write(self, config): + # Sort frames by 'importance', then reverse frame size and then frame + # hash to get a stable result + order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] + + framedata = [ + (f, save_frame(f, config=config)) for f in itervalues(self)] + + def get_prio(frame): + try: + return order.index(frame.FrameID) + except ValueError: + return len(order) + + def sort_key(items): + frame, data = items + return (get_prio(frame), len(data), frame.HashKey) + + framedata = [d for (f, d) in sorted(framedata, key=sort_key)] + + # only write unknown frames if they were loaded from the version + # we are saving with. Theoretically we could upgrade frames + # but some frames can be nested like CHAP, so there is a chance + # we create a mixed frame mess. + if self._unknown_v2_version == config.v2_version: + framedata.extend(data for data in self.unknown_frames + if len(data) > 10) + + return bytearray().join(framedata) + + def getall(self, key): + """Return all frames with a given name (the list may be empty). + + Args: + key (text): key for frames to get + + This is best explained by examples:: + + id3.getall('TIT2') == [id3['TIT2']] + id3.getall('TTTT') == [] + id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), + TXXX(desc='baz', text='quuuux'), ...] + + Since this is based on the frame's HashKey, which is + colon-separated, you can use it to do things like + ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``. + """ + if key in self: + return [self[key]] + else: + key = key + ":" + return [v for s, v in self.items() if s.startswith(key)] + + def setall(self, key, values): + """Delete frames of the given type and add frames in 'values'. + + Args: + key (text): key for frames to delete + values (List[`Frame`]): frames to add + """ + + self.delall(key) + for tag in values: + self[tag.HashKey] = tag + + def delall(self, key): + """Delete all tags of a given kind; see getall. + + Args: + key (text): key for frames to delete + """ + + if key in self: + del(self[key]) + else: + key = key + ":" + for k in list(self.keys()): + if k.startswith(key): + del(self[k]) + + def pprint(self): + """ + Returns: + text: tags in a human-readable format. + + "Human-readable" is used loosely here. The format is intended + to mirror that used for Vorbis or APEv2 output, e.g. + + ``TIT2=My Title`` + + However, ID3 frames can have multiple keys: + + ``POPM=user@example.org=3 128/255`` + """ + + frames = sorted(Frame.pprint(s) for s in self.values()) + return "\n".join(frames) + + def _add(self, frame, strict): + """Add a frame. + + Args: + frame (Frame): the frame to add + strict (bool): if this should raise in case it can't be added + and frames shouldn't be merged. + """ + + if not isinstance(frame, Frame): + raise TypeError("%r not a Frame instance" % frame) + + orig_frame = frame + frame = frame._upgrade_frame() + if frame is None: + if not strict: + return + raise TypeError( + "Can't upgrade %r frame" % type(orig_frame).__name__) + + hash_key = frame.HashKey + if strict or hash_key not in self: + self[hash_key] = frame + return + + # Try to merge frames, or change the new one. Since changing + # the new one can lead to new conflicts, try until everything is + # either merged or added. + while True: + old_frame = self[hash_key] + new_frame = old_frame._merge_frame(frame) + new_hash = new_frame.HashKey + if new_hash == hash_key: + self[hash_key] = new_frame + break + else: + assert new_frame is frame + if new_hash not in self: + self[new_hash] = new_frame + break + hash_key = new_hash + + def loaded_frame(self, tag): + """Deprecated; use the add method.""" + + self._add(tag, True) + + def add(self, frame): + """Add a frame to the tag.""" + + # add = loaded_frame (and vice versa) break applications that + # expect to be able to override loaded_frame (e.g. Quod Libet), + # as does making loaded_frame call add. + self.loaded_frame(frame) + + def __setitem__(self, key, tag): + if not isinstance(tag, Frame): + raise TypeError("%r not a Frame instance" % tag) + super(ID3Tags, self).__setitem__(key, tag) + + def __update_common(self): + """Updates done by both v23 and v24 update""" + + if "TCON" in self: + # Get rid of "(xx)Foobr" format. + self["TCON"].genres = self["TCON"].genres + + mimes = {"PNG": "image/png", "JPG": "image/jpeg"} + for pic in self.getall("APIC"): + if pic.mime in mimes: + newpic = APIC( + encoding=pic.encoding, mime=mimes[pic.mime], + type=pic.type, desc=pic.desc, data=pic.data) + self.add(newpic) + + def update_to_v24(self): + """Convert older tags into an ID3v2.4 tag. + + This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to + TDRC). If you intend to save tags, you must call this function + at some point; it is called by default when loading the tag. + """ + + self.__update_common() + + # TDAT, TYER, and TIME have been turned into TDRC. + try: + date = text_type(self.get("TYER", "")) + if date.strip(u"\x00"): + self.pop("TYER") + dat = text_type(self.get("TDAT", "")) + if dat.strip("\x00"): + self.pop("TDAT") + date = "%s-%s-%s" % (date, dat[2:], dat[:2]) + time = text_type(self.get("TIME", "")) + if time.strip("\x00"): + self.pop("TIME") + date += "T%s:%s:00" % (time[:2], time[2:]) + if "TDRC" not in self: + self.add(TDRC(encoding=0, text=date)) + except UnicodeDecodeError: + # Old ID3 tags have *lots* of Unicode problems, so if TYER + # is bad, just chuck the frames. + pass + + # TORY can be the first part of a TDOR. + if "TORY" in self: + f = self.pop("TORY") + if "TDOR" not in self: + try: + self.add(TDOR(encoding=0, text=str(f))) + except UnicodeDecodeError: + pass + + # IPLS is now TIPL. + if "IPLS" in self: + f = self.pop("IPLS") + if "TIPL" not in self: + self.add(TIPL(encoding=f.encoding, people=f.people)) + + # These can't be trivially translated to any ID3v2.4 tags, or + # should have been removed already. + for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME"]: + if key in self: + del(self[key]) + + # Recurse into chapters + for f in self.getall("CHAP"): + f.sub_frames.update_to_v24() + for f in self.getall("CTOC"): + f.sub_frames.update_to_v24() + + def update_to_v23(self): + """Convert older (and newer) tags into an ID3v2.3 tag. + + This updates incompatible ID3v2 frames to ID3v2.3 ones. If you + intend to save tags as ID3v2.3, you must call this function + at some point. + + If you want to to go off spec and include some v2.4 frames + in v2.3, remove them before calling this and add them back afterwards. + """ + + self.__update_common() + + # TMCL, TIPL -> TIPL + if "TIPL" in self or "TMCL" in self: + people = [] + if "TIPL" in self: + f = self.pop("TIPL") + people.extend(f.people) + if "TMCL" in self: + f = self.pop("TMCL") + people.extend(f.people) + if "IPLS" not in self: + self.add(IPLS(encoding=f.encoding, people=people)) + + # TDOR -> TORY + if "TDOR" in self: + f = self.pop("TDOR") + if f.text: + d = f.text[0] + if d.year and "TORY" not in self: + self.add(TORY(encoding=f.encoding, text="%04d" % d.year)) + + # TDRC -> TYER, TDAT, TIME + if "TDRC" in self: + f = self.pop("TDRC") + if f.text: + d = f.text[0] + if d.year and "TYER" not in self: + self.add(TYER(encoding=f.encoding, text="%04d" % d.year)) + if d.month and d.day and "TDAT" not in self: + self.add(TDAT(encoding=f.encoding, + text="%02d%02d" % (d.day, d.month))) + if d.hour and d.minute and "TIME" not in self: + self.add(TIME(encoding=f.encoding, + text="%02d%02d" % (d.hour, d.minute))) + + # New frames added in v2.4 + v24_frames = [ + 'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR', + 'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO', + 'TSOA', 'TSOP', 'TSOT', 'TSST', + ] + + for key in v24_frames: + if key in self: + del(self[key]) + + # Recurse into chapters + for f in self.getall("CHAP"): + f.sub_frames.update_to_v23() + for f in self.getall("CTOC"): + f.sub_frames.update_to_v23() + + def _copy(self): + """Creates a shallow copy of all tags""" + + items = self.items() + subs = {} + for f in (self.getall("CHAP") + self.getall("CTOC")): + subs[f.HashKey] = f.sub_frames._copy() + return (items, subs) + + def _restore(self, value): + """Restores the state copied with _copy()""" + + items, subs = value + self.clear() + for key, value in items: + self[key] = value + if key in subs: + value.sub_frames._restore(subs[key]) + + +def save_frame(frame, name=None, config=None): + if config is None: + config = ID3SaveConfig() + + flags = 0 + if isinstance(frame, TextFrame): + if len(str(frame)) == 0: + return b'' + + framedata = frame._writeData(config) + + usize = len(framedata) + if usize > 2048: + # Disabled as this causes iTunes and other programs + # to fail to find these frames, which usually includes + # e.g. APIC. + # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') + # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN + pass + + if config.v2_version == 4: + bits = 7 + elif config.v2_version == 3: + bits = 8 + else: + raise ValueError + + datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) + + if name is not None: + assert isinstance(name, bytes) + frame_name = name + else: + frame_name = type(frame).__name__ + if PY3: + frame_name = frame_name.encode("ascii") + + header = struct.pack('>4s4sH', frame_name, datasize, flags) + return header + framedata + + +def read_frames(id3, data, frames): + """Does not error out""" + + assert id3.version >= ID3Header._V22 + + result = [] + unsupported_frames = [] + + if id3.version < ID3Header._V24 and id3.f_unsynch: + try: + data = unsynch.decode(data) + except ValueError: + pass + + if id3.version >= ID3Header._V23: + if id3.version < ID3Header._V24: + bpi = int + else: + bpi = determine_bpi(data, frames) + + while data: + header = data[:10] + try: + name, size, flags = struct.unpack('>4sLH', header) + except struct.error: + break # not enough header + if name.strip(b'\x00') == b'': + break + + size = bpi(size) + framedata = data[10:10 + size] + data = data[10 + size:] + if size == 0: + continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + + try: + # someone writes 2.3 frames with 2.2 names + if name[-1] == "\x00": + tag = Frames_2_2[name[:-1]] + name = tag.__base__.__name__ + + tag = frames[name] + except KeyError: + if is_valid_frame_id(name): + unsupported_frames.append(header + framedata) + else: + try: + result.append(tag._fromData(id3, flags, framedata)) + except NotImplementedError: + unsupported_frames.append(header + framedata) + except ID3JunkFrameError: + pass + elif id3.version >= ID3Header._V22: + while data: + header = data[0:6] + try: + name, size = struct.unpack('>3s3s', header) + except struct.error: + break # not enough header + size, = struct.unpack('>L', b'\x00' + size) + if name.strip(b'\x00') == b'': + break + + framedata = data[6:6 + size] + data = data[6 + size:] + if size == 0: + continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + + try: + tag = frames[name] + except KeyError: + if is_valid_frame_id(name): + unsupported_frames.append(header + framedata) + else: + try: + result.append( + tag._fromData(id3, 0, framedata)) + except (ID3EncryptionUnsupportedError, + NotImplementedError): + unsupported_frames.append(header + framedata) + except ID3JunkFrameError: + pass + + return result, unsupported_frames, data diff --git a/lib/mutagen/id3/_util.py b/lib/mutagen/id3/_util.py old mode 100644 new mode 100755 index 363953e7..93bb264e --- a/lib/mutagen/id3/_util.py +++ b/lib/mutagen/id3/_util.py @@ -1,15 +1,27 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2005 Michael Urman # 2013 Christoph Reiter # 2014 Ben Ockmore # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. -from .._compat import long_, integer_types -from .._util import MutagenError +from mutagen._compat import long_, integer_types, PY3 +from mutagen._util import MutagenError + + +def is_valid_frame_id(frame_id): + return frame_id.isalnum() and frame_id.isupper() + + +class ID3SaveConfig(object): + + def __init__(self, v2_version=4, v23_separator=None): + assert v2_version in (3, 4) + self.v2_version = v2_version + self.v23_separator = v23_separator class error(MutagenError): @@ -20,18 +32,6 @@ class ID3NoHeaderError(error, ValueError): pass -class ID3BadUnsynchData(error, ValueError): - pass - - -class ID3BadCompressedData(error, ValueError): - pass - - -class ID3TagError(error, ValueError): - pass - - class ID3UnsupportedVersionError(error, NotImplementedError): pass @@ -40,11 +40,7 @@ class ID3EncryptionUnsupportedError(error, NotImplementedError): pass -class ID3JunkFrameError(error, ValueError): - pass - - -class ID3Warning(error, UserWarning): +class ID3JunkFrameError(error): pass @@ -138,6 +134,8 @@ class BitPaddedInt(int, _BitPaddedMixin): shift = 0 if isinstance(value, integer_types): + if value < 0: + raise ValueError while value: numeric_value += (value & mask) << shift value >>= 8 @@ -160,6 +158,24 @@ class BitPaddedInt(int, _BitPaddedMixin): self.bigendian = bigendian return self +if PY3: + BitPaddedLong = BitPaddedInt +else: + class BitPaddedLong(long_, _BitPaddedMixin): + pass -class BitPaddedLong(long_, _BitPaddedMixin): - pass + +class ID3BadUnsynchData(error, ValueError): + """Deprecated""" + + +class ID3BadCompressedData(error, ValueError): + """Deprecated""" + + +class ID3TagError(error, ValueError): + """Deprecated""" + + +class ID3Warning(error, UserWarning): + """Deprecated""" diff --git a/lib/mutagen/m4a.py b/lib/mutagen/m4a.py old mode 100644 new mode 100755 index b8fa9784..c7583f8e --- a/lib/mutagen/m4a.py +++ b/lib/mutagen/m4a.py @@ -1,43 +1,28 @@ +# -*- coding: utf-8 -*- # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. -import sys - -if sys.version_info[0] != 2: - raise ImportError("No longer available with Python 3, use mutagen.mp4") - -"""Read and write MPEG-4 audio files with iTunes metadata. - -This module will read MPEG-4 audio information and metadata, -as found in Apple's M4A (aka MP4, M4B, M4P) files. - -There is no official specification for this format. The source code -for TagLib, FAAD, and various MPEG specifications at -http://developer.apple.com/documentation/QuickTime/QTFF/, -http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt, -and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all -consulted. - -This module does not support 64 bit atom sizes, and so will not -work on metadata over 4GB. +""" +since 1.9: mutagen.m4a is deprecated; use mutagen.mp4 instead. +since 1.31: mutagen.m4a will no longer work; any operation that could fail + will fail now. """ -import struct -import sys +import warnings -from cStringIO import StringIO +from mutagen import FileType, Tags, StreamInfo +from ._util import DictProxy, MutagenError, loadfile -from ._compat import reraise -from mutagen import FileType, Metadata, StreamInfo -from mutagen._constants import GENRES -from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy, \ - MutagenError +warnings.warn( + "mutagen.m4a is deprecated; use mutagen.mp4 instead.", + DeprecationWarning) -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -49,497 +34,70 @@ class M4AStreamInfoError(error): pass -class M4AMetadataValueError(ValueError, M4AMetadataError): +class M4AMetadataValueError(error): pass -import warnings -warnings.warn( - "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning) - - -# This is not an exhaustive list of container atoms, but just the -# ones this module needs to peek inside. -_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst", - "stbl", "minf", "stsd"] -_SKIP_SIZE = {"meta": 4} - __all__ = ['M4A', 'Open', 'delete', 'M4ACover'] -class M4ACover(str): - """A cover artwork. +class M4ACover(bytes): - Attributes: - imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) - """ FORMAT_JPEG = 0x0D FORMAT_PNG = 0x0E def __new__(cls, data, imageformat=None): - self = str.__new__(cls, data) + self = bytes.__new__(cls, data) if imageformat is None: imageformat = M4ACover.FORMAT_JPEG self.imageformat = imageformat - try: - self.format - except AttributeError: - self.format = imageformat return self -class Atom(object): - """An individual atom. - - Attributes: - children -- list child atoms (or None for non-container atoms) - length -- length of this atom, including length and name - name -- four byte name of the atom, as a str - offset -- location in the constructor-given fileobj of this atom - - This structure should only be used internally by Mutagen. - """ - - children = None - - def __init__(self, fileobj): - self.offset = fileobj.tell() - self.length, self.name = struct.unpack(">I4s", fileobj.read(8)) - if self.length == 1: - raise error("64 bit atom sizes are not supported") - elif self.length < 8: - return - - if self.name in _CONTAINERS: - self.children = [] - fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1) - while fileobj.tell() < self.offset + self.length: - self.children.append(Atom(fileobj)) - else: - fileobj.seek(self.offset + self.length, 0) - - @staticmethod - def render(name, data): - """Render raw atom data.""" - # this raises OverflowError if Py_ssize_t can't handle the atom data - size = len(data) + 8 - if size <= 0xFFFFFFFF: - return struct.pack(">I4s", size, name) + data - else: - return struct.pack(">I4sQ", 1, name, size + 8) + data - - def __getitem__(self, remaining): - """Look up a child atom, potentially recursively. - - e.g. atom['udta', 'meta'] => - """ - if not remaining: - return self - elif self.children is None: - raise KeyError("%r is not a container" % self.name) - for child in self.children: - if child.name == remaining[0]: - return child[remaining[1:]] - else: - raise KeyError("%r not found" % remaining[0]) - - def __repr__(self): - klass = self.__class__.__name__ - if self.children is None: - return "<%s name=%r length=%r offset=%r>" % ( - klass, self.name, self.length, self.offset) - else: - children = "\n".join([" " + line for child in self.children - for line in repr(child).splitlines()]) - return "<%s name=%r length=%r offset=%r\n%s>" % ( - klass, self.name, self.length, self.offset, children) - - -class Atoms(object): - """Root atoms in a given file. - - Attributes: - atoms -- a list of top-level atoms as Atom objects - - This structure should only be used internally by Mutagen. - """ - def __init__(self, fileobj): - self.atoms = [] - fileobj.seek(0, 2) - end = fileobj.tell() - fileobj.seek(0) - while fileobj.tell() < end: - self.atoms.append(Atom(fileobj)) - - def path(self, *names): - """Look up and return the complete path of an atom. - - For example, atoms.path('moov', 'udta', 'meta') will return a - list of three atoms, corresponding to the moov, udta, and meta - atoms. - """ - path = [self] - for name in names: - path.append(path[-1][name, ]) - return path[1:] - - def __getitem__(self, names): - """Look up a child atom. - - 'names' may be a list of atoms (['moov', 'udta']) or a string - specifying the complete path ('moov.udta'). - """ - if isinstance(names, basestring): - names = names.split(".") - for child in self.atoms: - if child.name == names[0]: - return child[names[1:]] - else: - raise KeyError("%s not found" % names[0]) - - def __repr__(self): - return "\n".join([repr(child) for child in self.atoms]) - - -class M4ATags(DictProxy, Metadata): - """Dictionary containing Apple iTunes metadata list key/values. - - Keys are four byte identifiers, except for freeform ('----') - keys. Values are usually unicode strings, but some atoms have a - special structure: - cpil -- boolean - trkn, disk -- tuple of 16 bit ints (current, total) - tmpo -- 16 bit int - covr -- list of M4ACover objects (which are tagged strs) - gnre -- not supported. Use '\\xa9gen' instead. - - The freeform '----' frames use a key in the format '----:mean:name' - where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique - identifier for this frame. The value is a str, but is probably - text that can be decoded as UTF-8. - - M4A tag data cannot exist outside of the structure of an M4A file, - so this class should not be manually instantiated. - - Unknown non-text tags are removed. - """ +class M4ATags(DictProxy, Tags): def load(self, atoms, fileobj): - try: - ilst = atoms["moov.udta.meta.ilst"] - except KeyError as key: - raise M4AMetadataError(key) - for atom in ilst.children: - fileobj.seek(atom.offset + 8) - data = fileobj.read(atom.length - 8) - parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0] - parse(self, atom, data) - - @staticmethod - def __key_sort(item1, item2): - (key1, v1) = item1 - (key2, v2) = item2 - # iTunes always writes the tags in order of "relevance", try - # to copy it as closely as possible. - order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", - "\xa9gen", "gnre", "trkn", "disk", - "\xa9day", "cpil", "tmpo", "\xa9too", - "----", "covr", "\xa9lyr"] - order = dict(zip(order, range(len(order)))) - last = len(order) - # If there's no key-based way to distinguish, order by length. - # If there's still no way, go by string comparison on the - # values, so we at least have something determinstic. - return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or - cmp(len(v1), len(v2)) or cmp(v1, v2)) + raise error("deprecated") def save(self, filename): - """Save the metadata to the given filename.""" - values = [] - items = self.items() - items.sort(self.__key_sort) - for key, value in items: - render = self.__atoms.get( - key[:4], (None, M4ATags.__render_text))[1] - values.append(render(self, key, value)) - data = Atom.render("ilst", "".join(values)) - - # Find the old atoms. - fileobj = open(filename, "rb+") - try: - atoms = Atoms(fileobj) - - moov = atoms["moov"] - - if moov != atoms.atoms[-1]: - # "Free" the old moov block. Something in the mdat - # block is not happy when its offset changes and it - # won't play back. So, rather than try to figure that - # out, just move the moov atom to the end of the file. - offset = self.__move_moov(fileobj, moov) - else: - offset = 0 - - try: - path = atoms.path("moov", "udta", "meta", "ilst") - except KeyError: - self.__save_new(fileobj, atoms, data, offset) - else: - self.__save_existing(fileobj, atoms, path, data, offset) - finally: - fileobj.close() - - def __move_moov(self, fileobj, moov): - fileobj.seek(moov.offset) - data = fileobj.read(moov.length) - fileobj.seek(moov.offset) - free = Atom.render("free", "\x00" * (moov.length - 8)) - fileobj.write(free) - fileobj.seek(0, 2) - # Figure out how far we have to shift all our successive - # seek calls, relative to what the atoms say. - old_end = fileobj.tell() - fileobj.write(data) - return old_end - moov.offset - - def __save_new(self, fileobj, atoms, ilst, offset): - hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9) - meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst) - moov, udta = atoms.path("moov", "udta") - insert_bytes(fileobj, len(meta), udta.offset + offset + 8) - fileobj.seek(udta.offset + offset + 8) - fileobj.write(meta) - self.__update_parents(fileobj, [moov, udta], len(meta), offset) - - def __save_existing(self, fileobj, atoms, path, data, offset): - # Replace the old ilst atom. - ilst = path.pop() - delta = len(data) - ilst.length - fileobj.seek(ilst.offset + offset) - if delta > 0: - insert_bytes(fileobj, delta, ilst.offset + offset) - elif delta < 0: - delete_bytes(fileobj, -delta, ilst.offset + offset) - fileobj.seek(ilst.offset + offset) - fileobj.write(data) - self.__update_parents(fileobj, path, delta, offset) - - def __update_parents(self, fileobj, path, delta, offset): - # Update all parent atoms with the new size. - for atom in path: - fileobj.seek(atom.offset + offset) - size = cdata.uint_be(fileobj.read(4)) + delta - fileobj.seek(atom.offset + offset) - fileobj.write(cdata.to_uint_be(size)) - - def __render_data(self, key, flags, data): - data = struct.pack(">2I", flags, 0) + data - return Atom.render(key, Atom.render("data", data)) - - def __parse_freeform(self, atom, data): - try: - fileobj = StringIO(data) - mean_length = cdata.uint_be(fileobj.read(4)) - # skip over 8 bytes of atom name, flags - mean = fileobj.read(mean_length - 4)[8:] - name_length = cdata.uint_be(fileobj.read(4)) - name = fileobj.read(name_length - 4)[8:] - value_length = cdata.uint_be(fileobj.read(4)) - # Name, flags, and reserved bytes - value = fileobj.read(value_length - 4)[12:] - except struct.error: - # Some ---- atoms have no data atom, I have no clue why - # they actually end up in the file. - pass - else: - self["%s:%s:%s" % (atom.name, mean, name)] = value - - def __render_freeform(self, key, value): - dummy, mean, name = key.split(":", 2) - mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean - name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name - value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value - final = mean + name + value - return Atom.render("----", final) - - def __parse_pair(self, atom, data): - self[atom.name] = struct.unpack(">2H", data[18:22]) - - def __render_pair(self, key, value): - track, total = value - if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: - data = struct.pack(">4H", 0, track, total, 0) - return self.__render_data(key, 0, data) - else: - raise M4AMetadataValueError("invalid numeric pair %r" % (value,)) - - def __render_pair_no_trailing(self, key, value): - track, total = value - if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: - data = struct.pack(">3H", 0, track, total) - return self.__render_data(key, 0, data) - else: - raise M4AMetadataValueError("invalid numeric pair %r" % (value,)) - - def __parse_genre(self, atom, data): - # Translate to a freeform genre. - genre = cdata.short_be(data[16:18]) - if "\xa9gen" not in self: - try: - self["\xa9gen"] = GENRES[genre - 1] - except IndexError: - pass - - def __parse_tempo(self, atom, data): - self[atom.name] = cdata.short_be(data[16:18]) - - def __render_tempo(self, key, value): - if 0 <= value < 1 << 16: - return self.__render_data(key, 0x15, cdata.to_ushort_be(value)) - else: - raise M4AMetadataValueError("invalid short integer %r" % value) - - def __parse_compilation(self, atom, data): - try: - self[atom.name] = bool(ord(data[16:17])) - except TypeError: - self[atom.name] = False - - def __render_compilation(self, key, value): - return self.__render_data(key, 0x15, chr(bool(value))) - - def __parse_cover(self, atom, data): - length, name, imageformat = struct.unpack(">I4sI", data[:12]) - if name != "data": - raise M4AMetadataError( - "unexpected atom %r inside 'covr'" % name) - if imageformat not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG): - imageformat = M4ACover.FORMAT_JPEG - self[atom.name] = M4ACover(data[16:length], imageformat) - - def __render_cover(self, key, value): - try: - imageformat = value.imageformat - except AttributeError: - imageformat = M4ACover.FORMAT_JPEG - data = Atom.render("data", struct.pack(">2I", imageformat, 0) + value) - return Atom.render(key, data) - - def __parse_text(self, atom, data): - flags = cdata.uint_be(data[8:12]) - if flags == 1: - self[atom.name] = data[16:].decode('utf-8', 'replace') - - def __render_text(self, key, value): - return self.__render_data(key, 0x1, value.encode('utf-8')) + raise error("deprecated") def delete(self, filename): - self.clear() - self.save(filename) - - __atoms = { - "----": (__parse_freeform, __render_freeform), - "trkn": (__parse_pair, __render_pair), - "disk": (__parse_pair, __render_pair_no_trailing), - "gnre": (__parse_genre, None), - "tmpo": (__parse_tempo, __render_tempo), - "cpil": (__parse_compilation, __render_compilation), - "covr": (__parse_cover, __render_cover), - } + raise error("deprecated") def pprint(self): - values = [] - for key, value in self.iteritems(): - key = key.decode('latin1') - try: - values.append("%s=%s" % (key, value)) - except UnicodeDecodeError: - values.append("%s=[%d bytes of data]" % (key, len(value))) - return "\n".join(values) + return u"" class M4AInfo(StreamInfo): - """MPEG-4 stream information. - - Attributes: - bitrate -- bitrate in bits per second, as an int - length -- file length in seconds, as a float - """ bitrate = 0 def __init__(self, atoms, fileobj): - hdlr = atoms["moov.trak.mdia.hdlr"] - fileobj.seek(hdlr.offset) - if "soun" not in fileobj.read(hdlr.length): - raise M4AStreamInfoError("track has no audio data") - - mdhd = atoms["moov.trak.mdia.mdhd"] - fileobj.seek(mdhd.offset) - data = fileobj.read(mdhd.length) - if ord(data[8]) == 0: - offset = 20 - fmt = ">2I" - else: - offset = 28 - fmt = ">IQ" - end = offset + struct.calcsize(fmt) - unit, length = struct.unpack(fmt, data[offset:end]) - self.length = float(length) / unit - - try: - atom = atoms["moov.trak.mdia.minf.stbl.stsd"] - fileobj.seek(atom.offset) - data = fileobj.read(atom.length) - self.bitrate = cdata.uint_be(data[-17:-13]) - except (ValueError, KeyError): - # Bitrate values are optional. - pass + raise error("deprecated") def pprint(self): - return "MPEG-4 audio, %.2f seconds, %d bps" % ( - self.length, self.bitrate) + return u"" class M4A(FileType): - """An MPEG-4 audio file, probably containing AAC. - - If more than one track is present in the file, the first is used. - Only audio ('soun') tracks will be read. - """ _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] - def load(self, filename): - self.filename = filename - fileobj = open(filename, "rb") - try: - atoms = Atoms(fileobj) - try: - self.info = M4AInfo(atoms, fileobj) - except StandardError as err: - reraise(M4AStreamInfoError, err, sys.exc_info()[2]) - try: - self.tags = M4ATags(atoms, fileobj) - except M4AMetadataError: - self.tags = None - except StandardError as err: - reraise(M4AMetadataError, err, sys.exc_info()[2]) - finally: - fileobj.close() + @loadfile() + def load(self, filething): + raise error("deprecated") def add_tags(self): self.tags = M4ATags() @staticmethod def score(filename, fileobj, header): - return ("ftyp" in header) + ("mp4" in header) + return 0 Open = M4A def delete(filename): - """Remove tags from a file.""" - - M4A(filename).delete() + raise error("deprecated") diff --git a/lib/mutagen/monkeysaudio.py b/lib/mutagen/monkeysaudio.py old mode 100644 new mode 100755 index 536d7b18..ae7df766 --- a/lib/mutagen/monkeysaudio.py +++ b/lib/mutagen/monkeysaudio.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Monkey's Audio streams with APEv2 tags. @@ -21,7 +21,7 @@ import struct from ._compat import endswith from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen._util import cdata +from mutagen._util import cdata, convert_error class MonkeysAudioHeaderError(error): @@ -29,18 +29,22 @@ class MonkeysAudioHeaderError(error): class MonkeysAudioInfo(StreamInfo): - """Monkey's Audio stream information. + """MonkeysAudioInfo() + + Monkey's Audio stream information. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bits_per_sample -- bits per sample - * version -- Monkey's Audio stream version, as a float (eg: 3.99) + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): bits per sample + version (`float`): Monkey's Audio stream version, as a float (eg: 3.99) """ + @convert_error(IOError, MonkeysAudioHeaderError) def __init__(self, fileobj): + """Raises MonkeysAudioHeaderError""" + header = fileobj.read(76) if len(header) != 76 or not header.startswith(b"MAC "): raise MonkeysAudioHeaderError("not a Monkey's Audio file") @@ -70,11 +74,20 @@ class MonkeysAudioInfo(StreamInfo): self.length = float(total_blocks) / self.sample_rate def pprint(self): - return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( + return u"Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( self.version, self.length, self.sample_rate) class MonkeysAudio(APEv2File): + """MonkeysAudio(filething) + + Arguments: + filething (filething) + + Attributes: + info (`MonkeysAudioInfo`) + """ + _Info = MonkeysAudioInfo _mimes = ["audio/ape", "audio/x-ape"] diff --git a/lib/mutagen/mp3.py b/lib/mutagen/mp3.py deleted file mode 100644 index 535c6b4d..00000000 --- a/lib/mutagen/mp3.py +++ /dev/null @@ -1,289 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright (C) 2006 Joe Wreschnig -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. - -"""MPEG audio stream information and tags.""" - -import os -import struct - -from ._compat import endswith -from mutagen import StreamInfo -from mutagen._util import MutagenError -from mutagen.id3 import ID3FileType, BitPaddedInt, delete - -__all__ = ["MP3", "Open", "delete", "MP3"] - - -class error(RuntimeError, MutagenError): - pass - - -class HeaderNotFoundError(error, IOError): - pass - - -class InvalidMPEGHeader(error, IOError): - pass - - -# Mode values. -STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4) - - -class MPEGInfo(StreamInfo): - """MPEG audio stream information - - Parse information about an MPEG audio file. This also reads the - Xing VBR header format. - - This code was implemented based on the format documentation at - http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm. - - Useful attributes: - - * length -- audio length, in seconds - * bitrate -- audio bitrate, in bits per second - * sketchy -- if true, the file may not be valid MPEG audio - - Useless attributes: - - * version -- MPEG version (1, 2, 2.5) - * layer -- 1, 2, or 3 - * mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) - * protected -- whether or not the file is "protected" - * padding -- whether or not audio frames are padded - * sample_rate -- audio sample rate, in Hz - """ - - # Map (version, layer) tuples to bitrates. - __BITRATE = { - (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, - 256, 288, 320, 352, 384, 416, 448], - (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, - 160, 192, 224, 256, 320, 384], - (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, - 128, 160, 192, 224, 256, 320], - (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, - 144, 160, 176, 192, 224, 256], - (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, - 80, 96, 112, 128, 144, 160], - } - - __BITRATE[(2, 3)] = __BITRATE[(2, 2)] - for i in range(1, 4): - __BITRATE[(2.5, i)] = __BITRATE[(2, i)] - - # Map version to sample rates. - __RATES = { - 1: [44100, 48000, 32000], - 2: [22050, 24000, 16000], - 2.5: [11025, 12000, 8000] - } - - sketchy = False - - def __init__(self, fileobj, offset=None): - """Parse MPEG stream information from a file-like object. - - If an offset argument is given, it is used to start looking - for stream information and Xing headers; otherwise, ID3v2 tags - will be skipped automatically. A correct offset can make - loading files significantly faster. - """ - - try: - size = os.path.getsize(fileobj.name) - except (IOError, OSError, AttributeError): - fileobj.seek(0, 2) - size = fileobj.tell() - - # If we don't get an offset, try to skip an ID3v2 tag. - if offset is None: - fileobj.seek(0, 0) - idata = fileobj.read(10) - try: - id3, insize = struct.unpack('>3sxxx4s', idata) - except struct.error: - id3, insize = '', 0 - insize = BitPaddedInt(insize) - if id3 == b'ID3' and insize > 0: - offset = insize + 10 - else: - offset = 0 - - # Try to find two valid headers (meaning, very likely MPEG data) - # at the given offset, 30% through the file, 60% through the file, - # and 90% through the file. - for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]: - try: - self.__try(fileobj, int(i), size - offset) - except error: - pass - else: - break - # If we can't find any two consecutive frames, try to find just - # one frame back at the original offset given. - else: - self.__try(fileobj, offset, size - offset, False) - self.sketchy = True - - def __try(self, fileobj, offset, real_size, check_second=True): - # This is going to be one really long function; bear with it, - # because there's not really a sane point to cut it up. - fileobj.seek(offset, 0) - - # We "know" we have an MPEG file if we find two frames that look like - # valid MPEG data. If we can't find them in 32k of reads, something - # is horribly wrong (the longest frame can only be about 4k). This - # is assuming the offset didn't lie. - data = fileobj.read(32768) - - frame_1 = data.find(b"\xff") - while 0 <= frame_1 <= (len(data) - 4): - frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0] - if ((frame_data >> 16) & 0xE0) != 0xE0: - frame_1 = data.find(b"\xff", frame_1 + 2) - else: - version = (frame_data >> 19) & 0x3 - layer = (frame_data >> 17) & 0x3 - protection = (frame_data >> 16) & 0x1 - bitrate = (frame_data >> 12) & 0xF - sample_rate = (frame_data >> 10) & 0x3 - padding = (frame_data >> 9) & 0x1 - # private = (frame_data >> 8) & 0x1 - self.mode = (frame_data >> 6) & 0x3 - # mode_extension = (frame_data >> 4) & 0x3 - # copyright = (frame_data >> 3) & 0x1 - # original = (frame_data >> 2) & 0x1 - # emphasis = (frame_data >> 0) & 0x3 - if (version == 1 or layer == 0 or sample_rate == 0x3 or - bitrate == 0 or bitrate == 0xF): - frame_1 = data.find(b"\xff", frame_1 + 2) - else: - break - else: - raise HeaderNotFoundError("can't sync to an MPEG frame") - - # There is a serious problem here, which is that many flags - # in an MPEG header are backwards. - self.version = [2.5, None, 2, 1][version] - self.layer = 4 - layer - self.protected = not protection - self.padding = bool(padding) - - self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] - self.bitrate *= 1000 - self.sample_rate = self.__RATES[self.version][sample_rate] - - if self.layer == 1: - frame_length = ( - (12 * self.bitrate // self.sample_rate) + padding) * 4 - frame_size = 384 - elif self.version >= 2 and self.layer == 3: - frame_length = (72 * self.bitrate // self.sample_rate) + padding - frame_size = 576 - else: - frame_length = (144 * self.bitrate // self.sample_rate) + padding - frame_size = 1152 - - if check_second: - possible = int(frame_1 + frame_length) - if possible > len(data) + 4: - raise HeaderNotFoundError("can't sync to second MPEG frame") - try: - frame_data = struct.unpack( - ">H", data[possible:possible + 2])[0] - except struct.error: - raise HeaderNotFoundError("can't sync to second MPEG frame") - if (frame_data & 0xFFE0) != 0xFFE0: - raise HeaderNotFoundError("can't sync to second MPEG frame") - - self.length = 8 * real_size / float(self.bitrate) - - # Try to find/parse the Xing header, which trumps the above length - # and bitrate calculation. - fileobj.seek(offset, 0) - data = fileobj.read(32768) - try: - xing = data[:-4].index(b"Xing") - except ValueError: - # Try to find/parse the VBRI header, which trumps the above length - # calculation. - try: - vbri = data[:-24].index(b"VBRI") - except ValueError: - pass - else: - # If a VBRI header was found, this is definitely MPEG audio. - self.sketchy = False - vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0] - if vbri_version == 1: - frame_count = struct.unpack( - '>I', data[vbri + 14:vbri + 18])[0] - samples = float(frame_size * frame_count) - self.length = (samples / self.sample_rate) or self.length - else: - # If a Xing header was found, this is definitely MPEG audio. - self.sketchy = False - flags = struct.unpack('>I', data[xing + 4:xing + 8])[0] - if flags & 0x1: - frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0] - samples = float(frame_size * frame_count) - self.length = (samples / self.sample_rate) or self.length - if flags & 0x2: - bitrate_data = struct.unpack( - '>I', data[xing + 12:xing + 16])[0] - self.bitrate = int((bitrate_data * 8) // self.length) - - def pprint(self): - s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % ( - self.version, self.layer, self.bitrate, self.sample_rate, - self.length) - if self.sketchy: - s += " (sketchy)" - return s - - -class MP3(ID3FileType): - """An MPEG audio (usually MPEG-1 Layer 3) file. - - :ivar info: :class:`MPEGInfo` - :ivar tags: :class:`ID3 ` - """ - - _Info = MPEGInfo - - _mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"] - - @property - def mime(self): - l = self.info.layer - return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime - - @staticmethod - def score(filename, fileobj, header_data): - filename = filename.lower() - - return (header_data.startswith(b"ID3") * 2 + - endswith(filename, b".mp3") + - endswith(filename, b".mp2") + endswith(filename, b".mpg") + - endswith(filename, b".mpeg")) - - -Open = MP3 - - -class EasyMP3(MP3): - """Like MP3, but uses EasyID3 for tags. - - :ivar info: :class:`MPEGInfo` - :ivar tags: :class:`EasyID3 ` - """ - - from mutagen.easyid3 import EasyID3 as ID3 - ID3 = ID3 diff --git a/lib/mutagen/mp3/__init__.py b/lib/mutagen/mp3/__init__.py new file mode 100755 index 00000000..9e26c6dc --- /dev/null +++ b/lib/mutagen/mp3/__init__.py @@ -0,0 +1,476 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""MPEG audio stream information and tags.""" + +import struct + +from mutagen import StreamInfo +from mutagen._util import MutagenError, enum, BitReader, BitReaderError, \ + convert_error +from mutagen._compat import endswith, xrange +from mutagen.id3 import ID3FileType, delete +from mutagen.id3._util import BitPaddedInt + +from ._util import XingHeader, XingHeaderError, VBRIHeader, VBRIHeaderError + + +__all__ = ["MP3", "Open", "delete", "MP3"] + + +class error(MutagenError): + pass + + +class HeaderNotFoundError(error): + pass + + +class InvalidMPEGHeader(error): + pass + + +@enum +class BitrateMode(object): + + UNKNOWN = 0 + """Probably a CBR file, but not sure""" + + CBR = 1 + """Constant Bitrate""" + + VBR = 2 + """Variable Bitrate""" + + ABR = 3 + """Average Bitrate (a variant of VBR)""" + + +def _guess_xing_bitrate_mode(xing): + + if xing.lame_header: + lame = xing.lame_header + if lame.vbr_method in (1, 8): + return BitrateMode.CBR + elif lame.vbr_method in (2, 9): + return BitrateMode.ABR + elif lame.vbr_method in (3, 4, 5, 6): + return BitrateMode.VBR + # everything else undefined, continue guessing + + # info tags get only written by lame for cbr files + if xing.is_info: + return BitrateMode.CBR + + # older lame and non-lame with some variant of vbr + if xing.vbr_scale != -1 or xing.lame_version_desc: + return BitrateMode.VBR + + return BitrateMode.UNKNOWN + + +# Mode values. +STEREO, JOINTSTEREO, DUALCHANNEL, MONO = xrange(4) + + +class MPEGFrame(object): + + # Map (version, layer) tuples to bitrates. + __BITRATE = { + (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, + 256, 288, 320, 352, 384, 416, 448], + (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384], + (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, + 128, 160, 192, 224, 256, 320], + (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, + 144, 160, 176, 192, 224, 256], + (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 144, 160], + } + + __BITRATE[(2, 3)] = __BITRATE[(2, 2)] + for i in xrange(1, 4): + __BITRATE[(2.5, i)] = __BITRATE[(2, i)] + + # Map version to sample rates. + __RATES = { + 1: [44100, 48000, 32000], + 2: [22050, 24000, 16000], + 2.5: [11025, 12000, 8000] + } + + sketchy = False + + def __init__(self, fileobj): + """Raises HeaderNotFoundError""" + + self.frame_offset = fileobj.tell() + + r = BitReader(fileobj) + try: + if r.bits(11) != 0x7ff: + raise HeaderNotFoundError("invalid sync") + version = r.bits(2) + layer = r.bits(2) + protection = r.bits(1) + bitrate = r.bits(4) + sample_rate = r.bits(2) + padding = r.bits(1) + r.skip(1) # private + self.mode = r.bits(2) + r.skip(6) + except BitReaderError: + raise HeaderNotFoundError("truncated header") + + assert r.get_position() == 32 and r.is_aligned() + + # try to be strict here to redice the chance of a false positive + if version == 1 or layer == 0 or sample_rate == 0x3 or \ + bitrate == 0xf or bitrate == 0: + raise HeaderNotFoundError("invalid header") + + self.channels = 1 if self.mode == MONO else 2 + + self.version = [2.5, None, 2, 1][version] + self.layer = 4 - layer + self.protected = not protection + self.padding = bool(padding) + + self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] + self.bitrate *= 1000 + self.sample_rate = self.__RATES[self.version][sample_rate] + + if self.layer == 1: + frame_size = 384 + slot = 4 + elif self.version >= 2 and self.layer == 3: + frame_size = 576 + slot = 1 + else: + frame_size = 1152 + slot = 1 + + frame_length = ( + ((frame_size // 8 * self.bitrate) // self.sample_rate) + + padding) * slot + + self.sketchy = True + + # Try to find/parse the Xing header, which trumps the above length + # and bitrate calculation. + if self.layer == 3: + self._parse_vbr_header(fileobj, self.frame_offset, frame_size) + + fileobj.seek(self.frame_offset + frame_length, 0) + + def _parse_vbr_header(self, fileobj, frame_offset, frame_size): + """Does not raise""" + + # Xing + xing_offset = XingHeader.get_offset(self) + fileobj.seek(frame_offset + xing_offset, 0) + try: + xing = XingHeader(fileobj) + except XingHeaderError: + pass + else: + lame = xing.lame_header + self.sketchy = False + self.bitrate_mode = _guess_xing_bitrate_mode(xing) + self.encoder_settings = xing.get_encoder_settings() + if xing.frames != -1: + samples = frame_size * xing.frames + if lame is not None: + samples -= lame.encoder_delay_start + samples -= lame.encoder_padding_end + if samples < 0: + # older lame versions wrote bogus delay/padding for short + # files with low bitrate + samples = 0 + self.length = float(samples) / self.sample_rate + if xing.bytes != -1 and self.length: + self.bitrate = int((xing.bytes * 8) / self.length) + if xing.lame_version_desc: + self.encoder_info = u"LAME %s" % xing.lame_version_desc + if lame is not None: + self.track_gain = lame.track_gain_adjustment + self.track_peak = lame.track_peak + self.album_gain = lame.album_gain_adjustment + return + + # VBRI + vbri_offset = VBRIHeader.get_offset(self) + fileobj.seek(frame_offset + vbri_offset, 0) + try: + vbri = VBRIHeader(fileobj) + except VBRIHeaderError: + pass + else: + self.bitrate_mode = BitrateMode.VBR + self.encoder_info = u"FhG" + self.sketchy = False + self.length = float(frame_size * vbri.frames) / self.sample_rate + if self.length: + self.bitrate = int((vbri.bytes * 8) / self.length) + + +def skip_id3(fileobj): + """Might raise IOError""" + + # WMP writes multiple id3s, so skip as many as we find + while True: + idata = fileobj.read(10) + try: + id3, insize = struct.unpack('>3sxxx4s', idata) + except struct.error: + id3, insize = b'', 0 + insize = BitPaddedInt(insize) + if id3 == b'ID3' and insize > 0: + fileobj.seek(insize, 1) + else: + fileobj.seek(-len(idata), 1) + break + + +def iter_sync(fileobj, max_read): + """Iterate over a fileobj and yields on each mpeg sync. + + When yielding the fileobj offset is right before the sync and can be + changed between iterations without affecting the iteration process. + + Might raise IOError. + """ + + read = 0 + size = 2 + last_byte = b"" + is_second = lambda b: ord(b) & 0xe0 == 0xe0 + + while read < max_read: + data_offset = fileobj.tell() + new_data = fileobj.read(min(max_read - read, size)) + if not new_data: + return + read += len(new_data) + + if last_byte == b"\xff" and is_second(new_data[0:1]): + fileobj.seek(data_offset - 1, 0) + yield + + size *= 2 + last_byte = new_data[-1:] + + find_offset = 0 + while True: + index = new_data.find(b"\xff", find_offset) + # if not found or the last byte -> read more + if index == -1 or index == len(new_data) - 1: + break + + if is_second(new_data[index + 1:index + 2]): + fileobj.seek(data_offset + index, 0) + yield + find_offset = index + 1 + + fileobj.seek(data_offset + len(new_data), 0) + + +class MPEGInfo(StreamInfo): + """MPEGInfo() + + MPEG audio stream information + + Parse information about an MPEG audio file. This also reads the + Xing VBR header format. + + This code was implemented based on the format documentation at + http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm. + + Useful attributes: + + Attributes: + length (`float`): audio length, in seconds + channels (`int`): number of audio channels + bitrate (`int`): audio bitrate, in bits per second + sample_rate (`int`) audio sample rate, in Hz + encoder_info (`mutagen.text`): a string containing encoder name and + possibly version. In case a lame tag is present this will start + with ``"LAME "``, if unknown it is empty, otherwise the + text format is undefined. + encoder_settings (`mutagen.text`): a string containing a guess about + the settings used for encoding. The format is undefined and + depends on the encoder. + bitrate_mode (`BitrateMode`): a :class:`BitrateMode` + track_gain (`float` or `None`): replaygain track gain (89db) or None + track_peak (`float` or `None`): replaygain track peak or None + album_gain (`float` or `None`): replaygain album gain (89db) or None + + Useless attributes: + + Attributes: + version (`float`): MPEG version (1, 2, 2.5) + layer (`int`): 1, 2, or 3 + mode (`int`): One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) + protected (`bool`): whether or not the file is "protected" + padding (`bool`) whether or not audio frames are padded + sketchy (`bool`): if true, the file may not be valid MPEG audio + """ + + sketchy = False + encoder_info = u"" + encoder_settings = u"" + bitrate_mode = BitrateMode.UNKNOWN + track_gain = track_peak = album_gain = album_peak = None + + @convert_error(IOError, error) + def __init__(self, fileobj, offset=None): + """Parse MPEG stream information from a file-like object. + + If an offset argument is given, it is used to start looking + for stream information and Xing headers; otherwise, ID3v2 tags + will be skipped automatically. A correct offset can make + loading files significantly faster. + + Raises HeaderNotFoundError, error + """ + + if offset is None: + fileobj.seek(0, 0) + else: + fileobj.seek(offset, 0) + + # skip anyway, because wmp stacks multiple id3 tags + skip_id3(fileobj) + + # find a sync in the first 1024K, give up after some invalid syncs + max_read = 1024 * 1024 + max_syncs = 1000 + enough_frames = 4 + min_frames = 2 + + self.sketchy = True + frames = [] + first_frame = None + + for _ in iter_sync(fileobj, max_read): + max_syncs -= 1 + if max_syncs <= 0: + break + + for _ in xrange(enough_frames): + try: + frame = MPEGFrame(fileobj) + except HeaderNotFoundError: + break + frames.append(frame) + if not frame.sketchy: + break + + # if we have min frames, save it in case this is all we get + if len(frames) >= min_frames and first_frame is None: + first_frame = frames[0] + + # if the last frame was a non-sketchy one (has a valid vbr header) + # we use that + if frames and not frames[-1].sketchy: + first_frame = frames[-1] + self.sketchy = False + break + + # if we have enough valid frames, use the first + if len(frames) >= enough_frames: + first_frame = frames[0] + self.sketchy = False + break + + # otherwise start over with the next sync + del frames[:] + + if first_frame is None: + raise HeaderNotFoundError("can't sync to MPEG frame") + + assert first_frame + + self.length = -1 + sketchy = self.sketchy + self.__dict__.update(first_frame.__dict__) + self.sketchy = sketchy + + # no length, estimate based on file size + if self.length == -1: + fileobj.seek(0, 2) + content_size = fileobj.tell() - first_frame.frame_offset + self.length = 8 * content_size / float(self.bitrate) + + def pprint(self): + info = str(self.bitrate_mode).split(".", 1)[-1] + if self.bitrate_mode == BitrateMode.UNKNOWN: + info = u"CBR?" + if self.encoder_info: + info += ", %s" % self.encoder_info + if self.encoder_settings: + info += ", %s" % self.encoder_settings + s = u"MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( + self.version, self.layer, self.bitrate, info, + self.sample_rate, self.channels, self.length) + if self.sketchy: + s += u" (sketchy)" + return s + + +class MP3(ID3FileType): + """MP3(filething) + + An MPEG audio (usually MPEG-1 Layer 3) file. + + Arguments: + filething (filething) + + Attributes: + info (`MPEGInfo`) + tags (`mutagen.id3.ID3`) + """ + + _Info = MPEGInfo + + _mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"] + + @property + def mime(self): + l = self.info.layer + return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime + + @staticmethod + def score(filename, fileobj, header_data): + filename = filename.lower() + + return (header_data.startswith(b"ID3") * 2 + + endswith(filename, b".mp3") + + endswith(filename, b".mp2") + endswith(filename, b".mpg") + + endswith(filename, b".mpeg")) + + +Open = MP3 + + +class EasyMP3(MP3): + """EasyMP3(filething) + + Like MP3, but uses EasyID3 for tags. + + Arguments: + filething (filething) + + Attributes: + info (`MPEGInfo`) + tags (`mutagen.easyid3.EasyID3`) + """ + + from mutagen.easyid3 import EasyID3 as ID3 + ID3 = ID3 diff --git a/lib/mutagen/mp3/_util.py b/lib/mutagen/mp3/_util.py new file mode 100755 index 00000000..40b28775 --- /dev/null +++ b/lib/mutagen/mp3/_util.py @@ -0,0 +1,524 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +""" +http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header +http://wiki.hydrogenaud.io/index.php?title=MP3 +""" + +from functools import partial + +from mutagen._util import cdata, BitReader +from mutagen._compat import xrange, iterbytes, cBytesIO + + +class LAMEError(Exception): + pass + + +class LAMEHeader(object): + """http://gabriel.mp3-tech.org/mp3infotag.html""" + + vbr_method = 0 + """0: unknown, 1: CBR, 2: ABR, 3/4/5: VBR, others: see the docs""" + + lowpass_filter = 0 + """lowpass filter value in Hz. 0 means unknown""" + + quality = -1 + """Encoding quality: 0..9""" + + vbr_quality = -1 + """VBR quality: 0..9""" + + track_peak = None + """Peak signal amplitude as float. None if unknown.""" + + track_gain_origin = 0 + """see the docs""" + + track_gain_adjustment = None + """Track gain adjustment as float (for 89db replay gain) or None""" + + album_gain_origin = 0 + """see the docs""" + + album_gain_adjustment = None + """Album gain adjustment as float (for 89db replay gain) or None""" + + encoding_flags = 0 + """see docs""" + + ath_type = -1 + """see docs""" + + bitrate = -1 + """Bitrate in kbps. For VBR the minimum bitrate, for anything else + (CBR, ABR, ..) the target bitrate. + """ + + encoder_delay_start = 0 + """Encoder delay in samples""" + + encoder_padding_end = 0 + """Padding in samples added at the end""" + + source_sample_frequency_enum = -1 + """see docs""" + + unwise_setting_used = False + """see docs""" + + stereo_mode = 0 + """see docs""" + + noise_shaping = 0 + """see docs""" + + mp3_gain = 0 + """Applied MP3 gain -127..127. Factor is 2 ** (mp3_gain / 4)""" + + surround_info = 0 + """see docs""" + + preset_used = 0 + """lame preset""" + + music_length = 0 + """Length in bytes excluding any ID3 tags""" + + music_crc = -1 + """CRC16 of the data specified by music_length""" + + header_crc = -1 + """CRC16 of this header and everything before (not checked)""" + + def __init__(self, xing, fileobj): + """Raises LAMEError if parsing fails""" + + payload = fileobj.read(27) + if len(payload) != 27: + raise LAMEError("Not enough data") + + # extended lame header + r = BitReader(cBytesIO(payload)) + revision = r.bits(4) + if revision != 0: + raise LAMEError("unsupported header revision %d" % revision) + + self.vbr_method = r.bits(4) + self.lowpass_filter = r.bits(8) * 100 + + # these have a different meaning for lame; expose them again here + self.quality = (100 - xing.vbr_scale) % 10 + self.vbr_quality = (100 - xing.vbr_scale) // 10 + + track_peak_data = r.bytes(4) + if track_peak_data == b"\x00\x00\x00\x00": + self.track_peak = None + else: + # see PutLameVBR() in LAME's VbrTag.c + self.track_peak = ( + cdata.uint32_be(track_peak_data) - 0.5) / 2 ** 23 + track_gain_type = r.bits(3) + self.track_gain_origin = r.bits(3) + sign = r.bits(1) + gain_adj = r.bits(9) / 10.0 + if sign: + gain_adj *= -1 + if track_gain_type == 1: + self.track_gain_adjustment = gain_adj + else: + self.track_gain_adjustment = None + assert r.is_aligned() + + album_gain_type = r.bits(3) + self.album_gain_origin = r.bits(3) + sign = r.bits(1) + album_gain_adj = r.bits(9) / 10.0 + if album_gain_type == 2: + self.album_gain_adjustment = album_gain_adj + else: + self.album_gain_adjustment = None + + self.encoding_flags = r.bits(4) + self.ath_type = r.bits(4) + + self.bitrate = r.bits(8) + + self.encoder_delay_start = r.bits(12) + self.encoder_padding_end = r.bits(12) + + self.source_sample_frequency_enum = r.bits(2) + self.unwise_setting_used = r.bits(1) + self.stereo_mode = r.bits(3) + self.noise_shaping = r.bits(2) + + sign = r.bits(1) + mp3_gain = r.bits(7) + if sign: + mp3_gain *= -1 + self.mp3_gain = mp3_gain + + r.skip(2) + self.surround_info = r.bits(3) + self.preset_used = r.bits(11) + self.music_length = r.bits(32) + self.music_crc = r.bits(16) + + self.header_crc = r.bits(16) + assert r.is_aligned() + + def guess_settings(self, major, minor): + """Gives a guess about the encoder settings used. Returns an empty + string if unknown. + + The guess is mostly correct in case the file was encoded with + the default options (-V --preset --alt-preset --abr -b etc) and no + other fancy options. + + Args: + major (int) + minor (int) + Returns: + text + """ + + version = major, minor + + if self.vbr_method == 2: + if version in ((3, 90), (3, 91), (3, 92)) and self.encoding_flags: + if self.bitrate < 255: + return u"--alt-preset %d" % self.bitrate + else: + return u"--alt-preset %d+" % self.bitrate + if self.preset_used != 0: + return u"--preset %d" % self.preset_used + elif self.bitrate < 255: + return u"--abr %d" % self.bitrate + else: + return u"--abr %d+" % self.bitrate + elif self.vbr_method == 1: + if self.preset_used == 0: + if self.bitrate < 255: + return u"-b %d" % self.bitrate + else: + return u"-b 255+" + elif self.preset_used == 1003: + return u"--preset insane" + return u"-b %d" % self.preset_used + elif version in ((3, 90), (3, 91), (3, 92)): + preset_key = (self.vbr_quality, self.quality, self.vbr_method, + self.lowpass_filter, self.ath_type) + if preset_key == (1, 2, 4, 19500, 3): + return u"--preset r3mix" + if preset_key == (2, 2, 3, 19000, 4): + return u"--alt-preset standard" + if preset_key == (2, 2, 3, 19500, 2): + return u"--alt-preset extreme" + + if self.vbr_method == 3: + return u"-V %s" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s --vbr-new" % self.vbr_quality + elif version in ((3, 93), (3, 94), (3, 95), (3, 96), (3, 97)): + if self.preset_used == 1001: + return u"--preset standard" + elif self.preset_used == 1002: + return u"--preset extreme" + elif self.preset_used == 1004: + return u"--preset fast standard" + elif self.preset_used == 1005: + return u"--preset fast extreme" + elif self.preset_used == 1006: + return u"--preset medium" + elif self.preset_used == 1007: + return u"--preset fast medium" + + if self.vbr_method == 3: + return u"-V %s" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s --vbr-new" % self.vbr_quality + elif version == (3, 98): + if self.vbr_method == 3: + return u"-V %s --vbr-old" % self.vbr_quality + elif self.vbr_method in (4, 5): + return u"-V %s" % self.vbr_quality + elif version >= (3, 99): + if self.vbr_method == 3: + return u"-V %s --vbr-old" % self.vbr_quality + elif self.vbr_method in (4, 5): + p = self.vbr_quality + adjust_key = (p, self.bitrate, self.lowpass_filter) + # https://sourceforge.net/p/lame/bugs/455/ + p = { + (5, 32, 0): 7, + (5, 8, 0): 8, + (6, 8, 0): 9, + }.get(adjust_key, p) + return u"-V %s" % p + + return u"" + + @classmethod + def parse_version(cls, fileobj): + """Returns a version string and True if a LAMEHeader follows. + The passed file object will be positioned right before the + lame header if True. + + Raises LAMEError if there is no lame version info. + """ + + # http://wiki.hydrogenaud.io/index.php?title=LAME_version_string + + data = fileobj.read(20) + if len(data) != 20: + raise LAMEError("Not a lame header") + if not data.startswith((b"LAME", b"L3.99")): + raise LAMEError("Not a lame header") + + data = data.lstrip(b"EMAL") + major, data = data[0:1], data[1:].lstrip(b".") + minor = b"" + for c in iterbytes(data): + if not c.isdigit(): + break + minor += c + data = data[len(minor):] + + try: + major = int(major.decode("ascii")) + minor = int(minor.decode("ascii")) + except ValueError: + raise LAMEError + + # the extended header was added sometimes in the 3.90 cycle + # e.g. "LAME3.90 (alpha)" should still stop here. + # (I have seen such a file) + if (major, minor) < (3, 90) or ( + (major, minor) == (3, 90) and data[-11:-10] == b"("): + flag = data.strip(b"\x00").rstrip().decode("ascii") + return (major, minor), u"%d.%d%s" % (major, minor, flag), False + + if len(data) < 11: + raise LAMEError("Invalid version: too long") + + flag = data[:-11].rstrip(b"\x00") + + flag_string = u"" + patch = u"" + if flag == b"a": + flag_string = u" (alpha)" + elif flag == b"b": + flag_string = u" (beta)" + elif flag == b"r": + patch = u".1+" + elif flag == b" ": + if (major, minor) > (3, 96): + patch = u".0" + else: + patch = u".0+" + elif flag == b"" or flag == b".": + patch = u".0+" + else: + flag_string = u" (?)" + + # extended header, seek back to 9 bytes for the caller + fileobj.seek(-11, 1) + + return (major, minor), \ + u"%d.%d%s%s" % (major, minor, patch, flag_string), True + + +class XingHeaderError(Exception): + pass + + +class XingHeaderFlags(object): + FRAMES = 0x1 + BYTES = 0x2 + TOC = 0x4 + VBR_SCALE = 0x8 + + +class XingHeader(object): + + frames = -1 + """Number of frames, -1 if unknown""" + + bytes = -1 + """Number of bytes, -1 if unknown""" + + toc = [] + """List of 100 file offsets in percent encoded as 0-255. E.g. entry + 50 contains the file offset in percent at 50% play time. + Empty if unknown. + """ + + vbr_scale = -1 + """VBR quality indicator 0-100. -1 if unknown""" + + lame_header = None + """A LAMEHeader instance or None""" + + lame_version = (0, 0) + """The LAME version as two element tuple (major, minor)""" + + lame_version_desc = u"" + """The version of the LAME encoder e.g. '3.99.0'. Empty if unknown""" + + is_info = False + """If the header started with 'Info' and not 'Xing'""" + + def __init__(self, fileobj): + """Parses the Xing header or raises XingHeaderError. + + The file position after this returns is undefined. + """ + + data = fileobj.read(8) + if len(data) != 8 or data[:4] not in (b"Xing", b"Info"): + raise XingHeaderError("Not a Xing header") + + self.is_info = (data[:4] == b"Info") + + flags = cdata.uint32_be_from(data, 4)[0] + + if flags & XingHeaderFlags.FRAMES: + data = fileobj.read(4) + if len(data) != 4: + raise XingHeaderError("Xing header truncated") + self.frames = cdata.uint32_be(data) + + if flags & XingHeaderFlags.BYTES: + data = fileobj.read(4) + if len(data) != 4: + raise XingHeaderError("Xing header truncated") + self.bytes = cdata.uint32_be(data) + + if flags & XingHeaderFlags.TOC: + data = fileobj.read(100) + if len(data) != 100: + raise XingHeaderError("Xing header truncated") + self.toc = list(bytearray(data)) + + if flags & XingHeaderFlags.VBR_SCALE: + data = fileobj.read(4) + if len(data) != 4: + raise XingHeaderError("Xing header truncated") + self.vbr_scale = cdata.uint32_be(data) + + try: + self.lame_version, self.lame_version_desc, has_header = \ + LAMEHeader.parse_version(fileobj) + if has_header: + self.lame_header = LAMEHeader(self, fileobj) + except LAMEError: + pass + + def get_encoder_settings(self): + """Returns the guessed encoder settings""" + + if self.lame_header is None: + return u"" + return self.lame_header.guess_settings(*self.lame_version) + + @classmethod + def get_offset(cls, info): + """Calculate the offset to the Xing header from the start of the + MPEG header including sync based on the MPEG header's content. + """ + + assert info.layer == 3 + + if info.version == 1: + if info.mode != 3: + return 36 + else: + return 21 + else: + if info.mode != 3: + return 21 + else: + return 13 + + +class VBRIHeaderError(Exception): + pass + + +class VBRIHeader(object): + + version = 0 + """VBRI header version""" + + quality = 0 + """Quality indicator""" + + bytes = 0 + """Number of bytes""" + + frames = 0 + """Number of frames""" + + toc_scale_factor = 0 + """Scale factor of TOC entries""" + + toc_frames = 0 + """Number of frames per table entry""" + + toc = [] + """TOC""" + + def __init__(self, fileobj): + """Reads the VBRI header or raises VBRIHeaderError. + + The file position is undefined after this returns + """ + + data = fileobj.read(26) + if len(data) != 26 or not data.startswith(b"VBRI"): + raise VBRIHeaderError("Not a VBRI header") + + offset = 4 + self.version, offset = cdata.uint16_be_from(data, offset) + if self.version != 1: + raise VBRIHeaderError( + "Unsupported header version: %r" % self.version) + + offset += 2 # float16.. can't do + self.quality, offset = cdata.uint16_be_from(data, offset) + self.bytes, offset = cdata.uint32_be_from(data, offset) + self.frames, offset = cdata.uint32_be_from(data, offset) + + toc_num_entries, offset = cdata.uint16_be_from(data, offset) + self.toc_scale_factor, offset = cdata.uint16_be_from(data, offset) + toc_entry_size, offset = cdata.uint16_be_from(data, offset) + self.toc_frames, offset = cdata.uint16_be_from(data, offset) + toc_size = toc_entry_size * toc_num_entries + toc_data = fileobj.read(toc_size) + if len(toc_data) != toc_size: + raise VBRIHeaderError("VBRI header truncated") + + self.toc = [] + if toc_entry_size == 2: + unpack = partial(cdata.uint16_be_from, toc_data) + elif toc_entry_size == 4: + unpack = partial(cdata.uint32_be_from, toc_data) + else: + raise VBRIHeaderError("Invalid TOC entry size") + + self.toc = [unpack(i)[0] for i in xrange(0, toc_size, toc_entry_size)] + + @classmethod + def get_offset(cls, info): + """Offset in bytes from the start of the MPEG header including sync""" + + assert info.layer == 3 + + return 36 diff --git a/lib/mutagen/mp4/__init__.py b/lib/mutagen/mp4/__init__.py old mode 100644 new mode 100755 index b60ffcb9..d37474af --- a/lib/mutagen/mp4/__init__.py +++ b/lib/mutagen/mp4/__init__.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write MPEG-4 audio files with iTunes metadata. @@ -26,18 +26,18 @@ were all consulted. import struct import sys -from mutagen import FileType, Metadata, StreamInfo +from mutagen import FileType, Tags, StreamInfo, PaddingInfo from mutagen._constants import GENRES -from mutagen._util import (cdata, insert_bytes, DictProxy, MutagenError, - hashable, enum) +from mutagen._util import cdata, insert_bytes, DictProxy, MutagenError, \ + hashable, enum, get_size, resize_bytes, loadfile, convert_error from mutagen._compat import (reraise, PY2, string_types, text_type, chr_, - iteritems, PY3, cBytesIO) + iteritems, PY3, cBytesIO, izip, xrange) from ._atom import Atoms, Atom, AtomError from ._util import parse_full_atom from ._as_entry import AudioSampleEntry, ASEntryError -class error(IOError, MutagenError): +class error(MutagenError): pass @@ -49,6 +49,10 @@ class MP4StreamInfoError(error): pass +class MP4NoTrackError(MP4StreamInfoError): + pass + + class MP4MetadataValueError(ValueError, MP4MetadataError): pass @@ -58,7 +62,7 @@ __all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm', 'AtomDataType'] @enum class AtomDataType(object): - """Enum for `dataformat` attribute of MP4FreeForm. + """Enum for ``dataformat`` attribute of MP4FreeForm. .. versionadded:: 1.25 """ @@ -132,8 +136,8 @@ class MP4Cover(bytes): """A cover artwork. Attributes: - - * imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) + imageformat (`AtomDataType`): format of the image + (either FORMAT_JPEG or FORMAT_PNG) """ FORMAT_JPEG = AtomDataType.JPEG @@ -149,15 +153,10 @@ class MP4Cover(bytes): def __eq__(self, other): if not isinstance(other, MP4Cover): - return NotImplemented + return bytes(self) == other - if not bytes.__eq__(self, other): - return False - - if self.imageformat != other.imageformat: - return False - - return True + return (bytes(self) == bytes(other) and + self.imageformat == other.imageformat) def __ne__(self, other): return not self.__eq__(other) @@ -173,8 +172,7 @@ class MP4FreeForm(bytes): """A freeform value. Attributes: - - * dataformat -- format of the data (see AtomDataType) + dataformat (`AtomDataType`): format of the data (see AtomDataType) """ FORMAT_DATA = AtomDataType.IMPLICIT # deprecated @@ -191,18 +189,11 @@ class MP4FreeForm(bytes): def __eq__(self, other): if not isinstance(other, MP4FreeForm): - return NotImplemented + return bytes(self) == other - if not bytes.__eq__(self, other): - return False - - if self.dataformat != other.dataformat: - return False - - if self.version != other.version: - return False - - return True + return (bytes(self) == bytes(other) and + self.dataformat == other.dataformat and + self.version == other.version) def __ne__(self, other): return not self.__eq__(other) @@ -213,7 +204,6 @@ class MP4FreeForm(bytes): AtomDataType(self.dataformat)) - def _name2key(name): if PY2: return name @@ -226,8 +216,48 @@ def _key2name(key): return key.encode("latin-1") -class MP4Tags(DictProxy, Metadata): - r"""Dictionary containing Apple iTunes metadata list key/values. +def _find_padding(atom_path): + # Check for padding "free" atom + # XXX: we only use them if they are adjacent to ilst, and only one. + # and there also is a top level free atom which we could use maybe..? + + meta, ilst = atom_path[-2:] + assert meta.name == b"meta" and ilst.name == b"ilst" + index = meta.children.index(ilst) + try: + prev = meta.children[index - 1] + if prev.name == b"free": + return prev + except IndexError: + pass + + try: + next_ = meta.children[index + 1] + if next_.name == b"free": + return next_ + except IndexError: + pass + + +def _item_sort_key(key, value): + # iTunes always writes the tags in order of "relevance", try + # to copy it as closely as possible. + order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", + "\xa9gen", "gnre", "trkn", "disk", + "\xa9day", "cpil", "pgap", "pcst", "tmpo", + "\xa9too", "----", "covr", "\xa9lyr"] + order = dict(izip(order, xrange(len(order)))) + last = len(order) + # If there's no key-based way to distinguish, order by length. + # If there's still no way, go by string comparison on the + # values, so we at least have something determinstic. + return (order.get(key[:4], last), len(repr(value)), repr(value)) + + +class MP4Tags(DictProxy, Tags): + r"""MP4Tags() + + Dictionary containing Apple iTunes metadata list key/values. Keys are four byte identifiers, except for freeform ('----') keys. Values are usually unicode strings, but some atoms have a @@ -272,9 +302,21 @@ class MP4Tags(DictProxy, Metadata): * 'trkn' -- track number, total tracks * 'disk' -- disc number, total discs + Integer values: + + * 'tmpo' -- tempo/BPM + * '\\xa9mvc' -- Movement Count + * '\\xa9mvi' -- Movement Index + * 'shwm' -- work/movement + * 'stik' -- Media Kind + * 'rtng' -- Content Rating + * 'tves' -- TV Episode + * 'tvsn' -- TV Season + * 'plID', 'cnID', 'geID', 'atID', 'sfID', 'cmID', 'akID' -- Various iTunes + Internal IDs + Others: - * 'tmpo' -- tempo/BPM, 16 bit int * 'covr' -- cover artwork, list of MP4Cover objects (which are tagged strs) * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead. @@ -294,13 +336,20 @@ class MP4Tags(DictProxy, Metadata): def __init__(self, *args, **kwargs): self._failed_atoms = {} - super(MP4Tags, self).__init__(*args, **kwargs) + super(MP4Tags, self).__init__() + if args or kwargs: + self.load(*args, **kwargs) def load(self, atoms, fileobj): try: - ilst = atoms[b"moov.udta.meta.ilst"] + path = atoms.path(b"moov", b"udta", b"meta", b"ilst") except KeyError as key: raise MP4MetadataError(key) + + free = _find_padding(path) + self._padding = free.datalength if free is not None else 0 + + ilst = path[-1] for atom in ilst.children: ok, data = atom.read(fileobj) if not ok: @@ -321,133 +370,137 @@ class MP4Tags(DictProxy, Metadata): def __setitem__(self, key, value): if not isinstance(key, str): raise TypeError("key has to be str") + self._render(key, value) super(MP4Tags, self).__setitem__(key, value) @classmethod def _can_load(cls, atoms): return b"moov.udta.meta.ilst" in atoms - @staticmethod - def __key_sort(item): - (key, v) = item - # iTunes always writes the tags in order of "relevance", try - # to copy it as closely as possible. - order = [b"\xa9nam", b"\xa9ART", b"\xa9wrt", b"\xa9alb", - b"\xa9gen", b"gnre", b"trkn", b"disk", - b"\xa9day", b"cpil", b"pgap", b"pcst", b"tmpo", - b"\xa9too", b"----", b"covr", b"\xa9lyr"] - order = dict(zip(order, range(len(order)))) - last = len(order) - # If there's no key-based way to distinguish, order by length. - # If there's still no way, go by string comparison on the - # values, so we at least have something determinstic. - return (order.get(key[:4], last), len(repr(v)), repr(v)) + def _render(self, key, value): + atom_name = _key2name(key)[:4] + if atom_name in self.__atoms: + render_func = self.__atoms[atom_name][1] + render_args = self.__atoms[atom_name][2:] + else: + render_func = type(self).__render_text + render_args = [] - def save(self, filename): - """Save the metadata to the given filename.""" + return render_func(self, key, value, *render_args) + + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, padding=None): values = [] - items = sorted(self.items(), key=self.__key_sort) + items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv)) for key, value in items: - atom_name = _key2name(key)[:4] - if atom_name in self.__atoms: - render_func = self.__atoms[atom_name][1] - else: - render_func = type(self).__render_text - try: - values.append(render_func(self, key, value)) + values.append(self._render(key, value)) except (TypeError, ValueError) as s: reraise(MP4MetadataValueError, s, sys.exc_info()[2]) - for atom_name, failed in iteritems(self._failed_atoms): + for key, failed in iteritems(self._failed_atoms): # don't write atoms back if we have added a new one with # the same name, this excludes freeform which can have # multiple atoms with the same key (most parsers seem to be able # to handle that) - if atom_name in self: - assert atom_name != b"----" + if key in self: + assert _key2name(key) != b"----" continue for data in failed: - values.append(Atom.render(_key2name(atom_name), data)) + values.append(Atom.render(_key2name(key), data)) data = Atom.render(b"ilst", b"".join(values)) # Find the old atoms. - with open(filename, "rb+") as fileobj: - try: - atoms = Atoms(fileobj) - except AtomError as err: - reraise(error, err, sys.exc_info()[2]) + try: + atoms = Atoms(filething.fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) - try: - path = atoms.path(b"moov", b"udta", b"meta", b"ilst") - except KeyError: - self.__save_new(fileobj, atoms, data) - else: - self.__save_existing(fileobj, atoms, path, data) + self.__save(filething.fileobj, atoms, data, padding) - def __pad_ilst(self, data, length=None): - if length is None: - length = ((len(data) + 1023) & ~1023) - len(data) - return Atom.render(b"free", b"\x00" * length) + def __save(self, fileobj, atoms, data, padding): + try: + path = atoms.path(b"moov", b"udta", b"meta", b"ilst") + except KeyError: + self.__save_new(fileobj, atoms, data, padding) + else: + self.__save_existing(fileobj, atoms, path, data, padding) - def __save_new(self, fileobj, atoms, ilst): + def __save_new(self, fileobj, atoms, ilst_data, padding_func): hdlr = Atom.render(b"hdlr", b"\x00" * 8 + b"mdirappl" + b"\x00" * 9) - meta = Atom.render( - b"meta", b"\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst)) + meta_data = b"\x00\x00\x00\x00" + hdlr + ilst_data + try: path = atoms.path(b"moov", b"udta") except KeyError: - # moov.udta not found -- create one path = atoms.path(b"moov") - meta = Atom.render(b"udta", meta) - offset = path[-1].offset + 8 - insert_bytes(fileobj, len(meta), offset) - fileobj.seek(offset) - fileobj.write(meta) - self.__update_parents(fileobj, path, len(meta)) - self.__update_offsets(fileobj, atoms, len(meta), offset) - def __save_existing(self, fileobj, atoms, path, data): + offset = path[-1]._dataoffset + + # ignoring some atom overhead... but we don't have padding left anyway + # and padding_size is guaranteed to be less than zero + content_size = get_size(fileobj) - offset + padding_size = -len(meta_data) + assert padding_size < 0 + info = PaddingInfo(padding_size, content_size) + new_padding = info._get_padding(padding_func) + new_padding = min(0xFFFFFFFF, new_padding) + + free = Atom.render(b"free", b"\x00" * new_padding) + meta = Atom.render(b"meta", meta_data + free) + if path[-1].name != b"udta": + # moov.udta not found -- create one + data = Atom.render(b"udta", meta) + else: + data = meta + + insert_bytes(fileobj, len(data), offset) + fileobj.seek(offset) + fileobj.write(data) + self.__update_parents(fileobj, path, len(data)) + self.__update_offsets(fileobj, atoms, len(data), offset) + + def __save_existing(self, fileobj, atoms, path, ilst_data, padding_func): # Replace the old ilst atom. - ilst = path.pop() + ilst = path[-1] offset = ilst.offset length = ilst.length - # Check for padding "free" atoms - meta = path[-1] - index = meta.children.index(ilst) - try: - prev = meta.children[index - 1] - if prev.name == b"free": - offset = prev.offset - length += prev.length - except IndexError: - pass - try: - next = meta.children[index + 1] - if next.name == b"free": - length += next.length - except IndexError: - pass + # Use adjacent free atom if there is one + free = _find_padding(path) + if free is not None: + offset = min(offset, free.offset) + length += free.length - delta = len(data) - length - if delta > 0 or (delta < 0 and delta > -8): - data += self.__pad_ilst(data) - delta = len(data) - length - insert_bytes(fileobj, delta, offset) - elif delta < 0: - data += self.__pad_ilst(data, -delta - 8) - delta = 0 + # Always add a padding atom to make things easier + padding_overhead = len(Atom.render(b"free", b"")) + content_size = get_size(fileobj) - (offset + length) + padding_size = length - (len(ilst_data) + padding_overhead) + info = PaddingInfo(padding_size, content_size) + new_padding = info._get_padding(padding_func) + # Limit padding size so we can be sure the free atom overhead is as we + # calculated above (see Atom.render) + new_padding = min(0xFFFFFFFF, new_padding) + + ilst_data += Atom.render(b"free", b"\x00" * new_padding) + + resize_bytes(fileobj, length, len(ilst_data), offset) + delta = len(ilst_data) - length fileobj.seek(offset) - fileobj.write(data) - self.__update_parents(fileobj, path, delta) + fileobj.write(ilst_data) + self.__update_parents(fileobj, path[:-1], delta) self.__update_offsets(fileobj, atoms, delta, offset) def __update_parents(self, fileobj, path, delta): """Update all parent atoms with the new size.""" + + if delta == 0: + return + for atom in path: fileobj.seek(atom.offset) size = cdata.uint_be(fileobj.read(4)) @@ -586,7 +639,11 @@ class MP4Tags(DictProxy, Metadata): def __render_pair(self, key, value): data = [] - for (track, total) in value: + for v in value: + try: + track, total = v + except TypeError: + raise ValueError if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: data.append(struct.pack(">4H", 0, track, total, 0)) else: @@ -621,30 +678,59 @@ class MP4Tags(DictProxy, Metadata): key = _name2key(b"\xa9gen") self.__add(key, values) - def __parse_tempo(self, atom, data): + def __parse_integer(self, atom, data): values = [] for version, flags, data in self.__parse_data(atom, data): - # version = 0, flags = 0 or 21 - if len(data) != 2: - raise MP4MetadataValueError("invalid tempo") - values.append(cdata.ushort_be(data)) + if version != 0: + raise MP4MetadataValueError("unsupported version") + if flags not in (AtomDataType.IMPLICIT, AtomDataType.INTEGER): + raise MP4MetadataValueError("unsupported type") + + if len(data) == 1: + value = cdata.int8(data) + elif len(data) == 2: + value = cdata.int16_be(data) + elif len(data) == 3: + value = cdata.int32_be(data + b"\x00") >> 8 + elif len(data) == 4: + value = cdata.int32_be(data) + elif len(data) == 8: + value = cdata.int64_be(data) + else: + raise MP4MetadataValueError( + "invalid value size %d" % len(data)) + values.append(value) + key = _name2key(atom.name) self.__add(key, values) - def __render_tempo(self, key, value): + def __render_integer(self, key, value, min_bytes): + assert min_bytes in (1, 2, 4, 8) + + data_list = [] try: - if len(value) == 0: - return self.__render_data(key, 0, AtomDataType.INTEGER, b"") + for v in value: + # We default to the int size of the usual values written + # by itunes for compatibility. + if cdata.int8_min <= v <= cdata.int8_max and min_bytes <= 1: + data = cdata.to_int8(v) + if cdata.int16_min <= v <= cdata.int16_max and min_bytes <= 2: + data = cdata.to_int16_be(v) + elif cdata.int32_min <= v <= cdata.int32_max and \ + min_bytes <= 4: + data = cdata.to_int32_be(v) + elif cdata.int64_min <= v <= cdata.int64_max and \ + min_bytes <= 8: + data = cdata.to_int64_be(v) + else: + raise MP4MetadataValueError( + "value out of range: %r" % value) + data_list.append(data) - if (min(value) < 0) or (max(value) >= 2 ** 16): - raise MP4MetadataValueError( - "invalid 16 bit integers: %r" % value) - except TypeError: - raise MP4MetadataValueError( - "tmpo must be a list of 16 bit integers") + except (TypeError, ValueError, cdata.error) as e: + raise MP4MetadataValueError(e) - values = [cdata.to_ushort_be(v) for v in value] - return self.__render_data(key, 0, AtomDataType.INTEGER, values) + return self.__render_data(key, 0, AtomDataType.INTEGER, data_list) def __parse_bool(self, atom, data): for version, flags, data in self.__parse_data(atom, data): @@ -710,7 +796,7 @@ class MP4Tags(DictProxy, Metadata): try: text = atom_data.decode("utf-8") except UnicodeDecodeError as e: - raise MP4MetadataError("%s: %s" % (atom.name, e)) + raise MP4MetadataError("%s: %s" % (_name2key(atom.name), e)) values.append(text) @@ -726,7 +812,10 @@ class MP4Tags(DictProxy, Metadata): if not isinstance(v, text_type): if PY3: raise TypeError("%r not str" % v) - v = v.decode("utf-8") + try: + v = v.decode("utf-8") + except (AttributeError, UnicodeDecodeError) as e: + raise TypeError(e) encoded.append(v.encode("utf-8")) return self.__render_data(key, 0, flags, encoded) @@ -736,17 +825,31 @@ class MP4Tags(DictProxy, Metadata): self._failed_atoms.clear() self.clear() - self.save(filename) + self.save(filename, padding=lambda x: 0) __atoms = { b"----": (__parse_freeform, __render_freeform), b"trkn": (__parse_pair, __render_pair), b"disk": (__parse_pair, __render_pair_no_trailing), b"gnre": (__parse_genre, None), - b"tmpo": (__parse_tempo, __render_tempo), + b"plID": (__parse_integer, __render_integer, 8), + b"cnID": (__parse_integer, __render_integer, 4), + b"geID": (__parse_integer, __render_integer, 4), + b"atID": (__parse_integer, __render_integer, 4), + b"sfID": (__parse_integer, __render_integer, 4), + b"cmID": (__parse_integer, __render_integer, 4), + b"akID": (__parse_integer, __render_integer, 1), + b"tvsn": (__parse_integer, __render_integer, 4), + b"tves": (__parse_integer, __render_integer, 4), + b"tmpo": (__parse_integer, __render_integer, 2), + b"\xa9mvi": (__parse_integer, __render_integer, 2), + b"\xa9mvc": (__parse_integer, __render_integer, 2), b"cpil": (__parse_bool, __render_bool), b"pgap": (__parse_bool, __render_bool), b"pcst": (__parse_bool, __render_bool), + b"shwm": (__parse_integer, __render_integer, 1), + b"stik": (__parse_integer, __render_integer, 1), + b"rtng": (__parse_integer, __render_integer, 1), b"covr": (__parse_cover, __render_cover), b"purl": (__parse_text, __render_text), b"egid": (__parse_text, __render_text), @@ -761,51 +864,66 @@ class MP4Tags(DictProxy, Metadata): __atoms[name] = (__parse_text, __render_text) def pprint(self): + + def to_line(key, value): + assert isinstance(key, text_type) + if isinstance(value, text_type): + return u"%s=%s" % (key, value) + return u"%s=%r" % (key, value) + values = [] - for key, value in iteritems(self): + for key, value in sorted(iteritems(self)): if not isinstance(key, text_type): key = key.decode("latin-1") if key == "covr": - values.append("%s=%s" % (key, ", ".join( - ["[%d bytes of data]" % len(data) for data in value]))) + values.append(u"%s=%s" % (key, u", ".join( + [u"[%d bytes of data]" % len(data) for data in value]))) elif isinstance(value, list): - values.append("%s=%s" % - (key, " / ".join(map(text_type, value)))) + for v in value: + values.append(to_line(key, v)) else: - values.append("%s=%s" % (key, value)) - return "\n".join(values) + values.append(to_line(key, value)) + return u"\n".join(values) class MP4Info(StreamInfo): - """MPEG-4 stream information. + """MP4Info() + + MPEG-4 stream information. Attributes: + bitrate (`int`): bitrate in bits per second, as an int + length (`float`): file length in seconds, as a float + channels (`int`): number of audio channels + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): bits per sample + codec (`mutagen.text`): + * if starting with ``"mp4a"`` uses an mp4a audio codec + (see the codec parameter in rfc6381 for details e.g. + ``"mp4a.40.2"``) + * for everything else see a list of possible values at + http://www.mp4ra.org/codecs.html - * bitrate -- bitrate in bits per second, as an int - * length -- file length in seconds, as a float - * channels -- number of audio channels - * sample_rate -- audio sampling rate in Hz - * bits_per_sample -- bits per sample - * codec (string): - * if starting with ``"mp4a"`` uses an mp4a audio codec - (see the codec parameter in rfc6381 for details e.g. ``"mp4a.40.2"``) - * for everything else see a list of possible values at - http://www.mp4ra.org/codecs.html - - e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. - * codec_description (string): - Name of the codec used (ALAC, AAC LC, AC-3...). Values might change in - the future, use for display purposes only. + e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. + codec_description (`mutagen.text`): + Name of the codec used (ALAC, AAC LC, AC-3...). Values might + change in the future, use for display purposes only. """ bitrate = 0 + length = 0.0 channels = 0 sample_rate = 0 bits_per_sample = 0 codec = u"" - codec_name = u"" + codec_description = u"" - def __init__(self, atoms, fileobj): + def __init__(self, *args, **kwargs): + if args or kwargs: + self.load(*args, **kwargs) + + @convert_error(IOError, MP4StreamInfoError) + def load(self, atoms, fileobj): try: moov = atoms[b"moov"] except KeyError: @@ -819,7 +937,7 @@ class MP4Info(StreamInfo): if data[8:12] == b"soun": break else: - raise MP4StreamInfoError("track has no audio data") + raise MP4NoTrackError("track has no audio data") mdhd = trak[b"mdia", b"mdhd"] ok, data = mdhd.read(fileobj) @@ -907,43 +1025,61 @@ class MP4Info(StreamInfo): class MP4(FileType): - """An MPEG-4 audio file, probably containing AAC. + """MP4(filething) + + An MPEG-4 audio file, probably containing AAC. If more than one track is present in the file, the first is used. Only audio ('soun') tracks will be read. - :ivar info: :class:`MP4Info` - :ivar tags: :class:`MP4Tags` + Arguments: + filething (filething) + + Attributes: + info (`MP4Info`) + tags (`MP4Tags`) """ MP4Tags = MP4Tags _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] - def load(self, filename): - self.filename = filename - with open(filename, "rb") as fileobj: - try: - atoms = Atoms(fileobj) - except AtomError as err: - reraise(error, err, sys.exc_info()[2]) + @loadfile() + def load(self, filething): + fileobj = filething.fileobj + try: + atoms = Atoms(fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) + + self.info = MP4Info() + try: + self.info.load(atoms, fileobj) + except MP4NoTrackError: + pass + except error: + raise + except Exception as err: + reraise(MP4StreamInfoError, err, sys.exc_info()[2]) + + if not MP4Tags._can_load(atoms): + self.tags = None + self._padding = 0 + else: try: - self.info = MP4Info(atoms, fileobj) + self.tags = self.MP4Tags(atoms, fileobj) except error: raise except Exception as err: - reraise(MP4StreamInfoError, err, sys.exc_info()[2]) - - if not MP4Tags._can_load(atoms): - self.tags = None + reraise(MP4MetadataError, err, sys.exc_info()[2]) else: - try: - self.tags = self.MP4Tags(atoms, fileobj) - except error: - raise - except Exception as err: - reraise(MP4MetadataError, err, sys.exc_info()[2]) + self._padding = self.tags._padding + + def save(self, *args, **kwargs): + """save(filething=None, padding=None)""" + + super(MP4, self).save(*args, **kwargs) def add_tags(self): if self.tags is None: @@ -959,7 +1095,19 @@ class MP4(FileType): Open = MP4 -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - MP4(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = MP4(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/mp4/_as_entry.py b/lib/mutagen/mp4/_as_entry.py old mode 100644 new mode 100755 index 1cc4ff88..15b7e6bc --- a/lib/mutagen/mp4/_as_entry.py +++ b/lib/mutagen/mp4/_as_entry.py @@ -2,12 +2,14 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. from mutagen._compat import cBytesIO, xrange from mutagen.aac import ProgramConfigElement -from mutagen._util import BitReader, BitReaderError, cdata, text_type +from mutagen._util import BitReader, BitReaderError, cdata +from mutagen._compat import text_type from ._util import parse_full_atom from ._atom import Atom, AtomError diff --git a/lib/mutagen/mp4/_atom.py b/lib/mutagen/mp4/_atom.py old mode 100644 new mode 100755 index 7567fa10..cd43a1fe --- a/lib/mutagen/mp4/_atom.py +++ b/lib/mutagen/mp4/_atom.py @@ -1,14 +1,15 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. import struct from mutagen._compat import PY2 +from mutagen._util import convert_error # This is not an exhaustive list of container atoms, but just the # ones this module needs to peek inside. @@ -27,6 +28,7 @@ class Atom(object): Attributes: children -- list child atoms (or None for non-container atoms) length -- length of this atom, including length and name + datalength = -- length of this atom without length, name name -- four byte name of the atom, as a str offset -- location in the constructor-given fileobj of this atom @@ -35,6 +37,7 @@ class Atom(object): children = None + @convert_error(IOError, AtomError) def __init__(self, fileobj, level=0): """May raise AtomError""" @@ -74,13 +77,16 @@ class Atom(object): else: fileobj.seek(self.offset + self.length, 0) + @property + def datalength(self): + return self.length - (self._dataoffset - self.offset) + def read(self, fileobj): """Return if all data could be read and the atom payload""" fileobj.seek(self._dataoffset, 0) - length = self.length - (self._dataoffset - self.offset) - data = fileobj.read(length) - return len(data) == length, data + data = fileobj.read(self.datalength) + return len(data) == self.datalength, data @staticmethod def render(name, data): @@ -138,6 +144,7 @@ class Atoms(object): This structure should only be used internally by Mutagen. """ + @convert_error(IOError, AtomError) def __init__(self, fileobj): self.atoms = [] fileobj.seek(0, 2) @@ -184,7 +191,7 @@ class Atoms(object): if child.name == names[0]: return child[names[1:]] else: - raise KeyError("%s not found" % names[0]) + raise KeyError("%r not found" % names[0]) def __repr__(self): return "\n".join([repr(child) for child in self.atoms]) diff --git a/lib/mutagen/mp4/_util.py b/lib/mutagen/mp4/_util.py old mode 100644 new mode 100755 index 9583334a..43d81c82 --- a/lib/mutagen/mp4/_util.py +++ b/lib/mutagen/mp4/_util.py @@ -2,8 +2,9 @@ # Copyright (C) 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. from mutagen._util import cdata diff --git a/lib/mutagen/musepack.py b/lib/mutagen/musepack.py old mode 100644 new mode 100755 index 0dc940f1..f4d210d1 --- a/lib/mutagen/musepack.py +++ b/lib/mutagen/musepack.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # Copyright (C) 2012 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Musepack audio streams with APEv2 tags. @@ -22,8 +22,8 @@ import struct from ._compat import endswith, xrange from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen.id3 import BitPaddedInt -from mutagen._util import cdata +from mutagen.id3._util import BitPaddedInt +from mutagen._util import cdata, convert_error class MusepackHeaderError(error): @@ -67,20 +67,24 @@ def _calc_sv8_peak(peak): class MusepackInfo(StreamInfo): - """Musepack stream information. + """MusepackInfo() + + Musepack stream information. Attributes: - - * channels -- number of audio channels - * length -- file length in seconds, as a float - * sample_rate -- audio sampling rate in Hz - * bitrate -- audio bitrate, in bits per second - * version -- Musepack stream version + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bitrate (`int`): audio bitrate, in bits per second + version (`int`) Musepack stream version Optional Attributes: - * title_gain, title_peak -- Replay Gain and peak data for this song - * album_gain, album_peak -- Replay Gain and peak data for this album + Attributes: + title_gain (`float`): Replay Gain for this song + title_peak (`float`): Peak data for this song + album_gain (`float`): Replay Gain for this album + album_peak (`float`): Peak data for this album These attributes are only available in stream version 7/8. The gains are a float, +/- some dB. The peaks are a percentage [0..1] of @@ -88,7 +92,10 @@ class MusepackInfo(StreamInfo): VorbisGain, you must multiply the peak by 2. """ + @convert_error(IOError, MusepackHeaderError) def __init__(self, fileobj): + """Raises MusepackHeaderError""" + header = fileobj.read(4) if len(header) != 4: raise MusepackHeaderError("not a Musepack file") @@ -161,7 +168,7 @@ class MusepackInfo(StreamInfo): try: self.version = bytearray(fileobj.read(1))[0] - except TypeError: + except (TypeError, IndexError): raise MusepackHeaderError("SH packet ended unexpectedly.") remaining_size -= 1 @@ -246,16 +253,25 @@ class MusepackInfo(StreamInfo): def pprint(self): rg_data = [] if hasattr(self, "title_gain"): - rg_data.append("%+0.2f (title)" % self.title_gain) + rg_data.append(u"%+0.2f (title)" % self.title_gain) if hasattr(self, "album_gain"): - rg_data.append("%+0.2f (album)" % self.album_gain) + rg_data.append(u"%+0.2f (album)" % self.album_gain) rg_data = (rg_data and ", Gain: " + ", ".join(rg_data)) or "" - return "Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( + return u"Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( self.version, self.length, self.sample_rate, self.bitrate, rg_data) class Musepack(APEv2File): + """Musepack(filething) + + Arguments: + filething (filething) + + Attributes: + info (`MusepackInfo`) + """ + _Info = MusepackInfo _mimes = ["audio/x-musepack", "audio/x-mpc"] diff --git a/lib/mutagen/ogg.py b/lib/mutagen/ogg.py old mode 100644 new mode 100755 index 99eaf422..8376e3ac --- a/lib/mutagen/ogg.py +++ b/lib/mutagen/ogg.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg bitstreams and pages. @@ -21,11 +21,11 @@ import sys import zlib from mutagen import FileType -from mutagen._util import cdata, insert_bytes, delete_bytes, MutagenError -from ._compat import cBytesIO, reraise, chr_ +from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, seek_end +from ._compat import cBytesIO, reraise, chr_, izip, xrange -class error(IOError, MutagenError): +class error(MutagenError): """Ogg stream parsing errors.""" pass @@ -42,14 +42,15 @@ class OggPage(object): to the start of the next page. Attributes: - - * version -- stream structure version (currently always 0) - * position -- absolute stream position (default -1) - * serial -- logical stream serial number (default 0) - * sequence -- page sequence number within logical stream (default 0) - * offset -- offset this page was read from (default None) - * complete -- if the last packet on this page is complete (default True) - * packets -- list of raw packet data (default []) + version (`int`): stream structure version (currently always 0) + position (`int`): absolute stream position (default -1) + serial (`int`): logical stream serial number (default 0) + sequence (`int`): page sequence number within logical stream + (default 0) + offset (`int` or `None`): offset this page was read from (default None) + complete (`bool`): if the last packet on this page is complete + (default True) + packets (List[`bytes`]): list of raw packet data (default []) Note that if 'complete' is false, the next page's 'continued' property must be true (so set both when constructing pages). @@ -67,6 +68,8 @@ class OggPage(object): complete = True def __init__(self, fileobj=None): + """Raises error, IOError, EOFError""" + self.packets = [] if fileobj is None: @@ -272,6 +275,39 @@ class OggPage(object): return [b"".join(p) for p in packets] + @classmethod + def _from_packets_try_preserve(cls, packets, old_pages): + """Like from_packets but in case the size and number of the packets + is the same as in the given pages the layout of the pages will + be copied (the page size and number will match). + + If the packets don't match this behaves like:: + + OggPage.from_packets(packets, sequence=old_pages[0].sequence) + """ + + old_packets = cls.to_packets(old_pages) + + if [len(p) for p in packets] != [len(p) for p in old_packets]: + # doesn't match, fall back + return cls.from_packets(packets, old_pages[0].sequence) + + new_data = b"".join(packets) + new_pages = [] + for old in old_pages: + new = OggPage() + new.sequence = old.sequence + new.complete = old.complete + new.continued = old.continued + new.position = old.position + for p in old.packets: + data, new_data = new_data[:len(p)], new_data[len(p):] + new.packets.append(data) + new_pages.append(new) + assert not new_data + + return new_pages + @staticmethod def from_packets(packets, sequence=0, default_size=4096, wiggle_room=2048): @@ -346,9 +382,13 @@ class OggPage(object): such, it must be opened r+b or w+b. """ + if not len(old_pages) or not len(new_pages): + raise ValueError("empty pages list not allowed") + # Number the new pages starting from the first old page. first = old_pages[0].sequence - for page, seq in zip(new_pages, range(first, first + len(new_pages))): + for page, seq in izip(new_pages, + xrange(first, first + len(new_pages))): page.sequence = seq page.serial = old_pages[0].serial @@ -362,24 +402,28 @@ class OggPage(object): if not new_pages[-1].complete and len(new_pages[-1].packets) == 1: new_pages[-1].position = -1 - new_data = b"".join(cls.write(p) for p in new_pages) + new_data = [cls.write(p) for p in new_pages] - # Make room in the file for the new data. - delta = len(new_data) - fileobj.seek(old_pages[0].offset, 0) - insert_bytes(fileobj, delta, old_pages[0].offset) - fileobj.seek(old_pages[0].offset, 0) - fileobj.write(new_data) - new_data_end = old_pages[0].offset + delta + # Add dummy data or merge the remaining data together so multiple + # new pages replace an old one + pages_diff = len(old_pages) - len(new_data) + if pages_diff > 0: + new_data.extend([b""] * pages_diff) + elif pages_diff < 0: + new_data[pages_diff - 1:] = [b"".join(new_data[pages_diff - 1:])] - # Go through the old pages and delete them. Since we shifted - # the data down the file, we need to adjust their offsets. We - # also need to go backwards, so we don't adjust the deltas of - # the other pages. - old_pages.reverse() - for old_page in old_pages: - adj_offset = old_page.offset + delta - delete_bytes(fileobj, old_page.size, adj_offset) + # Replace pages one by one. If the sizes match no resize happens. + offset_adjust = 0 + new_data_end = None + assert len(old_pages) == len(new_data) + for old_page, data in izip(old_pages, new_data): + offset = old_page.offset + offset_adjust + data_size = len(data) + resize_bytes(fileobj, old_page.size, data_size, offset) + fileobj.seek(offset, 0) + fileobj.write(data) + new_data_end = offset + data_size + offset_adjust += (data_size - old_page.size) # Finally, if there's any discrepency in length, we need to # renumber the pages for the logical stream. @@ -398,14 +442,15 @@ class OggPage(object): This finds the last page in the actual file object, or the last page in the stream (with eos set), whichever comes first. + + Returns None in case no page with the serial exists. + Raises error in case this isn't a valid ogg stream. + Raises IOError. """ # For non-muxed streams, look at the last page. - try: - fileobj.seek(-256 * 256, 2) - except IOError: - # The file is less than 64k in length. - fileobj.seek(0) + seek_end(fileobj, 256 * 256) + data = fileobj.read() try: index = data.rindex(b"OggS") @@ -443,66 +488,92 @@ class OggPage(object): class OggFileType(FileType): - """An generic Ogg file.""" + """OggFileType(filething) + + An generic Ogg file. + + Arguments: + filething (filething) + """ _Info = None _Tags = None _Error = None _mimes = ["application/ogg", "application/x-ogg"] - def load(self, filename): - """Load file information from a filename.""" + @loadfile() + def load(self, filething): + """load(filething) - self.filename = filename - fileobj = open(filename, "rb") - try: - try: - self.info = self._Info(fileobj) - self.tags = self._Tags(fileobj, self.info) - self.info._post_tags(fileobj) - except error as e: - reraise(self._Error, e, sys.exc_info()[2]) - except EOFError: - raise self._Error("no appropriate stream found") - finally: - fileobj.close() + Load file information from a filename. - def delete(self, filename=None): - """Remove tags from a file. - - If no filename is given, the one most recently loaded is used. + Args: + filething (filething) + Raises: + mutagen.MutagenError """ - if filename is None: - filename = self.filename + fileobj = filething.fileobj + + try: + self.info = self._Info(fileobj) + self.tags = self._Tags(fileobj, self.info) + self.info._post_tags(fileobj) + except (error, IOError) as e: + reraise(self._Error, e, sys.exc_info()[2]) + except EOFError: + raise self._Error("no appropriate stream found") + + @loadfile(writable=True) + def delete(self, filething): + """delete(filething=None) + + Remove tags from a file. + + If no filename is given, the one most recently loaded is used. + + Args: + filething (filething) + Raises: + mutagen.MutagenError + """ + + fileobj = filething.fileobj self.tags.clear() - fileobj = open(filename, "rb+") + # TODO: we should delegate the deletion to the subclass and not through + # _inject. try: try: - self.tags._inject(fileobj) + self.tags._inject(fileobj, lambda x: 0) except error as e: reraise(self._Error, e, sys.exc_info()[2]) except EOFError: raise self._Error("no appropriate stream found") - finally: - fileobj.close() + except IOError as e: + reraise(self._Error, e, sys.exc_info()[2]) - def save(self, filename=None): - """Save a tag to a file. + def add_tags(self): + raise self._Error + + @loadfile(writable=True) + def save(self, filething, padding=None): + """save(filething=None, padding=None) + + Save a tag to a file. If no filename is given, the one most recently loaded is used. + + Args: + filething (filething) + padding (PaddingFunction) + Raises: + mutagen.MutagenError """ - if filename is None: - filename = self.filename - fileobj = open(filename, "rb+") try: - try: - self.tags._inject(fileobj) - except error as e: - reraise(self._Error, e, sys.exc_info()[2]) - except EOFError: - raise self._Error("no appropriate stream found") - finally: - fileobj.close() + self.tags._inject(filething.fileobj, padding) + except (IOError, error) as e: + reraise(self._Error, e, sys.exc_info()[2]) + except EOFError: + raise self._Error("no appropriate stream found") diff --git a/lib/mutagen/oggflac.py b/lib/mutagen/oggflac.py old mode 100644 new mode 100755 index 507a7f55..3dc3fe2e --- a/lib/mutagen/oggflac.py +++ b/lib/mutagen/oggflac.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg FLAC comments. @@ -21,7 +21,10 @@ import struct from ._compat import cBytesIO -from mutagen.flac import StreamInfo, VCFLACDict, StrictFileObject +from mutagen import StreamInfo +from mutagen.flac import StreamInfo as FLACStreamInfo, error as FLACError +from mutagen._vorbis import VCommentDict +from mutagen._util import loadfile, convert_error from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -34,28 +37,24 @@ class OggFLACHeaderError(error): class OggFLACStreamInfo(StreamInfo): - """Ogg FLAC general header and stream info. + """OggFLACStreamInfo() - This encompasses the Ogg wrapper for the FLAC STREAMINFO metadata - block, as well as the Ogg codec setup that precedes it. + Ogg FLAC stream info. - Attributes (in addition to StreamInfo's): - - * packets -- number of metadata packets - * serial -- Ogg logical stream serial number + Attributes: + length (`float`): File length in seconds, as a float + channels (`float`): Number of channels + sample_rate (`int`): Sample rate in Hz" """ - packets = 0 - serial = 0 + length = 0 + channels = 0 + sample_rate = 0 - def load(self, data): - # Ogg expects file objects that don't raise on read - if isinstance(data, StrictFileObject): - data = data._fileobj - - page = OggPage(data) + def __init__(self, fileobj): + page = OggPage(fileobj) while not page.packets[0].startswith(b"\x7FFLAC"): - page = OggPage(data) + page = OggPage(fileobj) major, minor, self.packets, flac = struct.unpack( ">BBH4s", page.packets[0][5:13]) if flac != b"fLaC": @@ -66,8 +65,16 @@ class OggFLACStreamInfo(StreamInfo): self.serial = page.serial # Skip over the block header. - stringobj = StrictFileObject(cBytesIO(page.packets[0][17:])) - super(OggFLACStreamInfo, self).load(stringobj) + stringobj = cBytesIO(page.packets[0][17:]) + + try: + flac_info = FLACStreamInfo(stringobj) + except FLACError as e: + raise OggFLACHeaderError(e) + + for attr in ["min_blocksize", "max_blocksize", "sample_rate", + "channels", "bits_per_sample", "total_samples", "length"]: + setattr(self, attr, getattr(flac_info, attr)) def _post_tags(self, fileobj): if self.length: @@ -76,24 +83,26 @@ class OggFLACStreamInfo(StreamInfo): self.length = page.position / float(self.sample_rate) def pprint(self): - return u"Ogg " + super(OggFLACStreamInfo, self).pprint() + return u"Ogg FLAC, %.2f seconds, %d Hz" % ( + self.length, self.sample_rate) -class OggFLACVComment(VCFLACDict): - def load(self, data, info, errors='replace'): +class OggFLACVComment(VCommentDict): + + def __init__(self, fileobj, info): # data should be pointing at the start of an Ogg page, after # the first FLAC page. pages = [] complete = False while not complete: - page = OggPage(data) + page = OggPage(fileobj) if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) comment = cBytesIO(OggPage.to_packets(pages)[0][4:]) - super(OggFLACVComment, self).load(comment, errors=errors) + super(OggFLACVComment, self).__init__(comment, framing=False) - def _inject(self, fileobj): + def _inject(self, fileobj, padding_func): """Write tag data into the FLAC Vorbis comment packet/page.""" # Ogg FLAC has no convenient data marker like Vorbis, but the @@ -116,7 +125,7 @@ class OggFLACVComment(VCFLACDict): packets = OggPage.to_packets(old_pages, strict=False) # Set the new comment block. - data = self.write() + data = self.write(framing=False) data = packets[0][:1] + struct.pack(">I", len(data))[-3:] + data packets[0] = data @@ -125,13 +134,26 @@ class OggFLACVComment(VCFLACDict): class OggFLAC(OggFileType): - """An Ogg FLAC file.""" + """OggFLAC(filething) + + An Ogg FLAC file. + + Arguments: + filething (filething) + + Attributes: + info (`OggFLACStreamInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggFLACStreamInfo _Tags = OggFLACVComment _Error = OggFLACHeaderError _mimes = ["audio/x-oggflac"] + info = None + tags = None + @staticmethod def score(filename, fileobj, header): return (header.startswith(b"OggS") * ( @@ -141,7 +163,19 @@ class OggFLAC(OggFileType): Open = OggFLAC -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggFLAC(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggFLAC(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/oggopus.py b/lib/mutagen/oggopus.py old mode 100644 new mode 100755 index 091dcf43..758fcf9a --- a/lib/mutagen/oggopus.py +++ b/lib/mutagen/oggopus.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2012, 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Opus comments. @@ -20,6 +20,8 @@ import struct from mutagen import StreamInfo from mutagen._compat import BytesIO +from mutagen._util import get_size, loadfile, convert_error +from mutagen._tags import PaddingInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -33,15 +35,17 @@ class OggOpusHeaderError(error): class OggOpusInfo(StreamInfo): - """Ogg Opus stream information. + """OggOpusInfo() + + Ogg Opus stream information. Attributes: - - * length - file length in seconds, as a float - * channels - number of channels + length (`float`): File length in seconds, as a float + channels (`int`): Number of channels """ length = 0 + channels = 0 def __init__(self, fileobj): page = OggPage(fileobj) @@ -66,6 +70,8 @@ class OggOpusInfo(StreamInfo): def _post_tags(self, fileobj): page = OggPage.find_last(fileobj, self.serial) + if page is None: + raise OggOpusHeaderError self.length = (page.position - self.__pre_skip) / float(48000) def pprint(self): @@ -96,34 +102,62 @@ class OggOpusVComment(VCommentDict): data = OggPage.to_packets(pages)[0][8:] # Strip OpusTags fileobj = BytesIO(data) super(OggOpusVComment, self).__init__(fileobj, framing=False) + self._padding = len(data) - self._size # in case the LSB of the first byte after v-comment is 1, preserve the # following data padding_flag = fileobj.read(1) if padding_flag and ord(padding_flag) & 0x1: self._pad_data = padding_flag + fileobj.read() + self._padding = 0 # we have to preserve, so no padding else: self._pad_data = b"" - def _inject(self, fileobj): + def _inject(self, fileobj, padding_func): fileobj.seek(0) info = OggOpusInfo(fileobj) old_pages = self.__get_comment_pages(fileobj, info) packets = OggPage.to_packets(old_pages) - packets[0] = b"OpusTags" + self.write(framing=False) + self._pad_data - new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + vcomment_data = b"OpusTags" + self.write(framing=False) + + if self._pad_data: + # if we have padding data to preserver we can't add more padding + # as long as we don't know the structure of what follows + packets[0] = vcomment_data + self._pad_data + else: + content_size = get_size(fileobj) - len(packets[0]) # approx + padding_left = len(packets[0]) - len(vcomment_data) + info = PaddingInfo(padding_left, content_size) + new_padding = info._get_padding(padding_func) + packets[0] = vcomment_data + b"\x00" * new_padding + + new_pages = OggPage._from_packets_try_preserve(packets, old_pages) OggPage.replace(fileobj, old_pages, new_pages) class OggOpus(OggFileType): - """An Ogg Opus file.""" + """OggOpus(filething) + + An Ogg Opus file. + + Arguments: + filething (filething) + + Attributes: + info (`OggOpusInfo`) + tags (`mutagen._vorbis.VCommentDict`) + + """ _Info = OggOpusInfo _Tags = OggOpusVComment _Error = OggOpusHeaderError _mimes = ["audio/ogg", "audio/ogg; codecs=opus"] + info = None + tags = None + @staticmethod def score(filename, fileobj, header): return (header.startswith(b"OggS") * (b"OpusHead" in header)) @@ -132,7 +166,19 @@ class OggOpus(OggFileType): Open = OggOpus -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggOpus(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggOpus(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/oggspeex.py b/lib/mutagen/oggspeex.py old mode 100644 new mode 100755 index 0590fa6b..b3c1a17b --- a/lib/mutagen/oggspeex.py +++ b/lib/mutagen/oggspeex.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Speex comments. @@ -22,7 +22,8 @@ __all__ = ["OggSpeex", "Open", "delete"] from mutagen import StreamInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError -from mutagen._util import cdata +from mutagen._util import cdata, get_size, loadfile, convert_error +from mutagen._tags import PaddingInfo class error(OggError): @@ -34,19 +35,21 @@ class OggSpeexHeaderError(error): class OggSpeexInfo(StreamInfo): - """Ogg Speex stream information. + """OggSpeexInfo() + + Ogg Speex stream information. Attributes: - - * bitrate - nominal bitrate in bits per second - * channels - number of channels - * length - file length in seconds, as a float - - The reference encoder does not set the bitrate; in this case, - the bitrate will be 0. + length (`float`): file length in seconds, as a float + channels (`int`): number of channels + bitrate (`int`): nominal bitrate in bits per second. The reference + encoder does not set the bitrate; in this case, the bitrate will + be 0. """ length = 0 + channels = 0 + bitrate = 0 def __init__(self, fileobj): page = OggPage(fileobj) @@ -62,6 +65,8 @@ class OggSpeexInfo(StreamInfo): def _post_tags(self, fileobj): page = OggPage.find_last(fileobj, self.serial) + if page is None: + raise OggSpeexHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): @@ -79,10 +84,11 @@ class OggSpeexVComment(VCommentDict): if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) - data = OggPage.to_packets(pages)[0] + b"\x01" + data = OggPage.to_packets(pages)[0] super(OggSpeexVComment, self).__init__(data, framing=False) + self._padding = len(data) - self._size - def _inject(self, fileobj): + def _inject(self, fileobj, padding_func): """Write tag data into the Speex comment packet/page.""" fileobj.seek(0) @@ -109,21 +115,41 @@ class OggSpeexVComment(VCommentDict): packets = OggPage.to_packets(old_pages, strict=False) - # Set the new comment packet. - packets[0] = self.write(framing=False) + content_size = get_size(fileobj) - len(packets[0]) # approx + vcomment_data = self.write(framing=False) + padding_left = len(packets[0]) - len(vcomment_data) - new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + info = PaddingInfo(padding_left, content_size) + new_padding = info._get_padding(padding_func) + + # Set the new comment packet. + packets[0] = vcomment_data + b"\x00" * new_padding + + new_pages = OggPage._from_packets_try_preserve(packets, old_pages) OggPage.replace(fileobj, old_pages, new_pages) class OggSpeex(OggFileType): - """An Ogg Speex file.""" + """OggSpeex(filething) + + An Ogg Speex file. + + Arguments: + filething (filething) + + Attributes: + info (`OggSpeexInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggSpeexInfo _Tags = OggSpeexVComment _Error = OggSpeexHeaderError _mimes = ["audio/x-speex"] + info = None + tags = None + @staticmethod def score(filename, fileobj, header): return (header.startswith(b"OggS") * (b"Speex " in header)) @@ -132,7 +158,19 @@ class OggSpeex(OggFileType): Open = OggSpeex -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggSpeex(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggSpeex(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/oggtheora.py b/lib/mutagen/oggtheora.py old mode 100644 new mode 100755 index 0542823c..85512d08 --- a/lib/mutagen/oggtheora.py +++ b/lib/mutagen/oggtheora.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Theora comments. @@ -20,7 +20,8 @@ import struct from mutagen import StreamInfo from mutagen._vorbis import VCommentDict -from mutagen._util import cdata +from mutagen._util import cdata, get_size, loadfile, convert_error +from mutagen._tags import PaddingInfo from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -33,15 +34,19 @@ class OggTheoraHeaderError(error): class OggTheoraInfo(StreamInfo): - """Ogg Theora stream information. + """OggTheoraInfo() + + Ogg Theora stream information. Attributes: - - * length - file length in seconds, as a float - * fps - video frames per second, as a float + length (`float`): File length in seconds, as a float + fps (`float`): Video frames per second, as a float + bitrate (`int`): Bitrate in bps (int) """ length = 0 + fps = 0 + bitrate = 0 def __init__(self, fileobj): page = OggPage(fileobj) @@ -63,13 +68,16 @@ class OggTheoraInfo(StreamInfo): def _post_tags(self, fileobj): page = OggPage.find_last(fileobj, self.serial) + if page is None: + raise OggTheoraHeaderError position = page.position mask = (1 << self.granule_shift) - 1 frames = (position >> self.granule_shift) + (position & mask) self.length = frames / float(self.fps) def pprint(self): - return "Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate) + return u"Ogg Theora, %.2f seconds, %d bps" % (self.length, + self.bitrate) class OggTheoraCommentDict(VCommentDict): @@ -84,9 +92,10 @@ class OggTheoraCommentDict(VCommentDict): pages.append(page) complete = page.complete or (len(page.packets) > 1) data = OggPage.to_packets(pages)[0][7:] - super(OggTheoraCommentDict, self).__init__(data + b"\x01") + super(OggTheoraCommentDict, self).__init__(data, framing=False) + self._padding = len(data) - self._size - def _inject(self, fileobj): + def _inject(self, fileobj, padding_func): """Write tag data into the Theora comment packet/page.""" fileobj.seek(0) @@ -102,20 +111,40 @@ class OggTheoraCommentDict(VCommentDict): packets = OggPage.to_packets(old_pages, strict=False) - packets[0] = b"\x81theora" + self.write(framing=False) + content_size = get_size(fileobj) - len(packets[0]) # approx + vcomment_data = b"\x81theora" + self.write(framing=False) + padding_left = len(packets[0]) - len(vcomment_data) - new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + info = PaddingInfo(padding_left, content_size) + new_padding = info._get_padding(padding_func) + + packets[0] = vcomment_data + b"\x00" * new_padding + + new_pages = OggPage._from_packets_try_preserve(packets, old_pages) OggPage.replace(fileobj, old_pages, new_pages) class OggTheora(OggFileType): - """An Ogg Theora file.""" + """OggTheora(filething) + + An Ogg Theora file. + + Arguments: + filething (filething) + + Attributes: + info (`OggTheoraInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggTheoraInfo _Tags = OggTheoraCommentDict _Error = OggTheoraHeaderError _mimes = ["video/x-theora"] + info = None + tags = None + @staticmethod def score(filename, fileobj, header): return (header.startswith(b"OggS") * @@ -125,7 +154,19 @@ class OggTheora(OggFileType): Open = OggTheora -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggTheora(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggTheora(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/oggvorbis.py b/lib/mutagen/oggvorbis.py old mode 100644 new mode 100755 index fda58c4f..9c1d8fb0 --- a/lib/mutagen/oggvorbis.py +++ b/lib/mutagen/oggvorbis.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """Read and write Ogg Vorbis comments. @@ -21,6 +21,8 @@ import struct from mutagen import StreamInfo from mutagen._vorbis import VCommentDict +from mutagen._util import get_size, loadfile, convert_error +from mutagen._tags import PaddingInfo from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -33,17 +35,26 @@ class OggVorbisHeaderError(error): class OggVorbisInfo(StreamInfo): - """Ogg Vorbis stream information. + """OggVorbisInfo() + + Ogg Vorbis stream information. Attributes: + length (`float`): File length in seconds, as a float + channels (`int`): Number of channels + bitrate (`int`): Nominal ('average') bitrate in bits per second + sample_Rate (`int`): Sample rate in Hz - * length - file length in seconds, as a float - * bitrate - nominal ('average') bitrate in bits per second, as an int """ - length = 0 + length = 0.0 + channels = 0 + bitrate = 0 + sample_rate = 0 def __init__(self, fileobj): + """Raises ogg.error, IOError""" + page = OggPage(fileobj) while not page.packets[0].startswith(b"\x01vorbis"): page = OggPage(fileobj) @@ -70,7 +81,11 @@ class OggVorbisInfo(StreamInfo): self.bitrate = nominal_bitrate def _post_tags(self, fileobj): + """Raises ogg.error""" + page = OggPage.find_last(fileobj, self.serial) + if page is None: + raise OggVorbisHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): @@ -91,8 +106,9 @@ class OggVCommentDict(VCommentDict): complete = page.complete or (len(page.packets) > 1) data = OggPage.to_packets(pages)[0][7:] # Strip off "\x03vorbis". super(OggVCommentDict, self).__init__(data) + self._padding = len(data) - self._size - def _inject(self, fileobj): + def _inject(self, fileobj, padding_func): """Write tag data into the Vorbis comment packet/page.""" # Find the old pages in the file; we'll need to remove them, @@ -110,21 +126,41 @@ class OggVCommentDict(VCommentDict): packets = OggPage.to_packets(old_pages, strict=False) - # Set the new comment packet. - packets[0] = b"\x03vorbis" + self.write() + content_size = get_size(fileobj) - len(packets[0]) # approx + vcomment_data = b"\x03vorbis" + self.write() + padding_left = len(packets[0]) - len(vcomment_data) - new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + info = PaddingInfo(padding_left, content_size) + new_padding = info._get_padding(padding_func) + + # Set the new comment packet. + packets[0] = vcomment_data + b"\x00" * new_padding + + new_pages = OggPage._from_packets_try_preserve(packets, old_pages) OggPage.replace(fileobj, old_pages, new_pages) class OggVorbis(OggFileType): - """An Ogg Vorbis file.""" + """OggVorbis(filething) + + Arguments: + filething (filething) + + An Ogg Vorbis file. + + Attributes: + info (`OggVorbisInfo`) + tags (`mutagen._vorbis.VCommentDict`) + """ _Info = OggVorbisInfo _Tags = OggVCommentDict _Error = OggVorbisHeaderError _mimes = ["audio/vorbis", "audio/x-vorbis"] + info = None + tags = None + @staticmethod def score(filename, fileobj, header): return (header.startswith(b"OggS") * (b"\x01vorbis" in header)) @@ -133,7 +169,19 @@ class OggVorbis(OggFileType): Open = OggVorbis -def delete(filename): - """Remove tags from a file.""" +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """ delete(filething) - OggVorbis(filename).delete() + Arguments: + filething (filething) + Raises: + mutagen.MutagenError + + Remove tags from a file. + """ + + t = OggVorbis(filething) + filething.fileobj.seek(0) + t.delete(filething) diff --git a/lib/mutagen/optimfrog.py b/lib/mutagen/optimfrog.py old mode 100644 new mode 100755 index 3b6a70d8..830224d6 --- a/lib/mutagen/optimfrog.py +++ b/lib/mutagen/optimfrog.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """OptimFROG audio streams with APEv2 tags. @@ -23,6 +23,7 @@ __all__ = ["OptimFROG", "Open", "delete"] import struct from ._compat import endswith +from ._util import convert_error from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete @@ -32,16 +33,20 @@ class OptimFROGHeaderError(error): class OptimFROGInfo(StreamInfo): - """OptimFROG stream information. + """OptimFROGInfo() + + OptimFROG stream information. Attributes: - - * channels - number of audio channels - * length - file length in seconds, as a float - * sample_rate - audio sampling rate in Hz + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz """ + @convert_error(IOError, OptimFROGHeaderError) def __init__(self, fileobj): + """Raises OptimFROGHeaderError""" + header = fileobj.read(76) if (len(header) != 76 or not header.startswith(b"OFR ") or struct.unpack("I", b"\x00" + bytes(data))[0] + tempos.append((deltasum, TEMPO, tempo)) + off += num + elif event_type in (0xF0, 0xF7): + val, off = _var_int(chunk, off) + off += val + else: + if event_type < 0x80: + # if < 0x80 take the type from the previous midi event + off += 1 + event_type = status + elif event_type < 0xF0: + off += 2 + status = event_type + else: + raise SMFError("invalid event") + + if event_type >> 4 in (0xD, 0xC): + off -= 1 + + events.append((deltasum, MIDI, delta)) + + return events, tempos + + +def _read_midi_length(fileobj): + """Returns the duration in seconds. Can raise all kind of errors...""" + + TEMPO, MIDI = range(2) + + def read_chunk(fileobj): + info = fileobj.read(8) + if len(info) != 8: + raise SMFError("truncated") + chunklen = struct.unpack(">I", info[4:])[0] + data = fileobj.read(chunklen) + if len(data) != chunklen: + raise SMFError("truncated") + return info[:4], data + + identifier, chunk = read_chunk(fileobj) + if identifier != b"MThd": + raise SMFError("Not a MIDI file") + + if len(chunk) != 6: + raise SMFError("truncated") + + format_, ntracks, tickdiv = struct.unpack(">HHH", chunk) + if format_ > 1: + raise SMFError("Not supported format %d" % format_) + + if tickdiv >> 15: + # fps = (-(tickdiv >> 8)) & 0xFF + # subres = tickdiv & 0xFF + # never saw one of those + raise SMFError("Not supported timing interval") + + # get a list of events and tempo changes for each track + tracks = [] + first_tempos = None + for tracknum in xrange(ntracks): + identifier, chunk = read_chunk(fileobj) + if identifier != b"MTrk": + continue + events, tempos = _read_track(chunk) + + # In case of format == 1, copy the first tempo list to all tracks + first_tempos = first_tempos or tempos + if format_ == 1: + tempos = list(first_tempos) + events += tempos + events.sort() + tracks.append(events) + + # calculate the duration of each track + durations = [] + for events in tracks: + tempo = 500000 + parts = [] + deltasum = 0 + for (dummy, type_, data) in events: + if type_ == TEMPO: + parts.append((deltasum, tempo)) + tempo = data + deltasum = 0 + else: + deltasum += data + parts.append((deltasum, tempo)) + + duration = 0 + for (deltasum, tempo) in parts: + quarter, tpq = deltasum / float(tickdiv), tempo + duration += (quarter * tpq) + duration /= 10 ** 6 + + durations.append(duration) + + # return the longest one + return max(durations) + + +class SMFInfo(StreamInfo): + """SMFInfo() + + Attributes: + length (`float`): Length in seconds + + """ + + def __init__(self, fileobj): + """Raises SMFError""" + + self.length = _read_midi_length(fileobj) + + def pprint(self): + return u"SMF, %.2f seconds" % self.length + + +class SMF(FileType): + """SMF(filething) + + Standard MIDI File (SMF) + + Attributes: + info (`SMFInfo`) + tags: `None` + """ + + _mimes = ["audio/midi", "audio/x-midi"] + + @loadfile() + def load(self, filething): + try: + self.info = SMFInfo(filething.fileobj) + except IOError as e: + raise SMFError(e) + + def add_tags(self): + raise SMFError("doesn't support tags") + + @staticmethod + def score(filename, fileobj, header): + filename = filename.lower() + return header.startswith(b"MThd") and ( + endswith(filename, ".mid") or endswith(filename, ".midi")) + + +Open = SMF +error = SMFError + +__all__ = ["SMF"] diff --git a/lib/mutagen/trueaudio.py b/lib/mutagen/trueaudio.py old mode 100644 new mode 100755 index f268fe60..882e3a62 --- a/lib/mutagen/trueaudio.py +++ b/lib/mutagen/trueaudio.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- - # Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """True Audio audio stream information and tags. @@ -20,27 +20,31 @@ __all__ = ["TrueAudio", "Open", "delete", "EasyTrueAudio"] from ._compat import endswith from mutagen import StreamInfo from mutagen.id3 import ID3FileType, delete -from mutagen._util import cdata, MutagenError +from mutagen._util import cdata, MutagenError, convert_error -class error(RuntimeError, MutagenError): +class error(MutagenError): pass -class TrueAudioHeaderError(error, IOError): +class TrueAudioHeaderError(error): pass class TrueAudioInfo(StreamInfo): - """True Audio stream information. + """TrueAudioInfo() + + True Audio stream information. Attributes: - - * length - audio length, in seconds - * sample_rate - audio sample rate, in Hz + length (`float`): audio length, in seconds + sample_rate (`int`): audio sample rate, in Hz """ + @convert_error(IOError, TrueAudioHeaderError) def __init__(self, fileobj, offset): + """Raises TrueAudioHeaderError""" + fileobj.seek(offset or 0) header = fileobj.read(18) if len(header) != 18 or not header.startswith(b"TTA"): @@ -50,15 +54,22 @@ class TrueAudioInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return "True Audio, %.2f seconds, %d Hz." % ( + return u"True Audio, %.2f seconds, %d Hz." % ( self.length, self.sample_rate) class TrueAudio(ID3FileType): - """A True Audio file. + """TrueAudio(filething, ID3=None) - :ivar info: :class:`TrueAudioInfo` - :ivar tags: :class:`ID3 ` + A True Audio file. + + Arguments: + filething (filething) + ID3 (mutagen.id3.ID3) + + Attributes: + info (`TrueAudioInfo`) + tags (`mutagen.id3.ID3`) """ _Info = TrueAudioInfo @@ -74,10 +85,17 @@ Open = TrueAudio class EasyTrueAudio(TrueAudio): - """Like MP3, but uses EasyID3 for tags. + """EasyTrueAudio(filething, ID3=None) - :ivar info: :class:`TrueAudioInfo` - :ivar tags: :class:`EasyID3 ` + Like MP3, but uses EasyID3 for tags. + + Arguments: + filething (filething) + ID3 (mutagen.id3.ID3) + + Attributes: + info (`TrueAudioInfo`) + tags (`mutagen.easyid3.EasyID3`) """ from mutagen.easyid3 import EasyID3 as ID3 diff --git a/lib/mutagen/wavpack.py b/lib/mutagen/wavpack.py old mode 100644 new mode 100755 index 1ef6ef43..290b90c3 --- a/lib/mutagen/wavpack.py +++ b/lib/mutagen/wavpack.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - # Copyright 2006 Joe Wreschnig # 2014 Christoph Reiter # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. """WavPack reading and writing. @@ -21,7 +21,7 @@ __all__ = ["WavPack", "Open", "delete"] from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen._util import cdata +from mutagen._util import cdata, convert_error class WavPackHeaderError(error): @@ -47,6 +47,7 @@ class _WavPackHeader(object): self.crc = crc @classmethod + @convert_error(IOError, WavPackHeaderError) def from_fileobj(cls, fileobj): """A new _WavPackHeader or raises WavPackHeaderError""" @@ -74,11 +75,10 @@ class WavPackInfo(StreamInfo): """WavPack stream information. Attributes: - - * channels - number of audio channels (1 or 2) - * length - file length in seconds, as a float - * sample_rate - audio sampling rate in Hz - * version - WavPack stream version + channels (int): number of audio channels (1 or 2) + length (float: file length in seconds, as a float + sample_rate (int): audio sampling rate in Hz + version (int) WavPack stream version """ def __init__(self, fileobj): @@ -109,7 +109,8 @@ class WavPackInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return "WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + return u"WavPack, %.2f seconds, %d Hz" % (self.length, + self.sample_rate) class WavPack(APEv2File):