diff --git a/lib/mutagen/README.rst b/lib/mutagen/README.rst new file mode 100644 index 00000000..7ea1b21e --- /dev/null +++ b/lib/mutagen/README.rst @@ -0,0 +1,58 @@ +Mutagen +======= + +Mutagen is a Python module to handle audio metadata. It supports ASF, FLAC, +M4A, Monkey's Audio, MP3, Musepack, Ogg Opus, Ogg FLAC, Ogg Speex, Ogg +Theora, Ogg Vorbis, True Audio, WavPack, OptimFROG, and AIFF audio files. +All versions of ID3v2 are supported, and all standard ID3v2.4 frames are +parsed. It can read Xing headers to accurately calculate the bitrate and +length of MP3s. ID3 and APEv2 tags can be edited regardless of audio +format. It can also manipulate Ogg streams on an individual packet/page +level. + +Mutagen works on Python 2.6, 2.7, 3.3, 3.4 (CPython and PyPy) and has no +dependencies outside the Python standard library. + + +Installing +---------- + + $ ./setup.py build + $ su -c "./setup.py install" + + +Documentation +------------- + +The primary documentation for Mutagen is the doc strings found in +the source code and the sphinx documentation in the docs/ directory. + +To build the docs (needs sphinx): + + $ ./setup.py build_sphinx + +The tools/ directory contains several useful examples. + +The docs are also hosted on readthedocs.org: + + http://mutagen.readthedocs.org + + +Testing the Module +------------------ + +To test Mutagen's MP3 reading support, run + $ tools/mutagen-pony +Mutagen will try to load all of them, and report any errors. + +To look at the tags in files, run + $ tools/mutagen-inspect filename ... + +To run our test suite, + $ ./setup.py test + + +Compatibility/Bugs +------------------ + +See docs/bugs.rst diff --git a/lib/mutagen/__init__.py b/lib/mutagen/__init__.py index 28febab3..83b47e5f 100644 --- a/lib/mutagen/__init__.py +++ b/lib/mutagen/__init__.py @@ -1,4 +1,5 @@ -# mutagen aims to be an all purpose media tagging library +# -*- coding: utf-8 -*- + # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify @@ -6,7 +7,7 @@ # published by the Free Software Foundation. -"""Mutagen aims to be an all purpose tagging library. +"""Mutagen aims to be an all purpose multimedia tagging library. :: @@ -19,245 +20,22 @@ depending on tag or format. They may also be entirely different objects for certain keys, again depending on format. """ -version = (1, 22) +from mutagen._util import MutagenError +from mutagen._file import FileType, StreamInfo, File +from mutagen._tags import Metadata + +version = (1, 27) """Version tuple.""" version_string = ".".join(map(str, version)) """Version string.""" +MutagenError -import warnings +FileType -import mutagen._util +StreamInfo +File -class Metadata(object): - """An abstract dict-like object. - - Metadata is the base class for many of the tag objects in Mutagen. - """ - - def __init__(self, *args, **kwargs): - if args or kwargs: - self.load(*args, **kwargs) - - def load(self, *args, **kwargs): - raise NotImplementedError - - def save(self, filename=None): - """Save changes to a file.""" - - raise NotImplementedError - - def delete(self, filename=None): - """Remove tags from a file.""" - - raise NotImplementedError - - -class FileType(mutagen._util.DictMixin): - """An abstract object wrapping tags and audio stream information. - - Attributes: - - * info -- stream information (length, bitrate, sample rate) - * tags -- metadata tags, if any - - Each file format has different potential tags and stream - information. - - FileTypes implement an interface very similar to Metadata; the - dict interface, save, load, and delete calls on a FileType call - the appropriate methods on its tag data. - """ - - info = None - tags = None - filename = None - _mimes = ["application/octet-stream"] - - def __init__(self, filename=None, *args, **kwargs): - if filename is None: - warnings.warn("FileType constructor requires a filename", - DeprecationWarning) - else: - self.load(filename, *args, **kwargs) - - def load(self, filename, *args, **kwargs): - raise NotImplementedError - - def __getitem__(self, key): - """Look up a metadata tag key. - - If the file has no tags at all, a KeyError is raised. - """ - - if self.tags is None: - raise KeyError(key) - else: - return self.tags[key] - - def __setitem__(self, key, value): - """Set a metadata tag. - - If the file has no tags, an appropriate format is added (but - not written until save is called). - """ - - if self.tags is None: - self.add_tags() - self.tags[key] = value - - def __delitem__(self, key): - """Delete a metadata tag key. - - If the file has no tags at all, a KeyError is raised. - """ - - if self.tags is None: - raise KeyError(key) - else: - del(self.tags[key]) - - def keys(self): - """Return a list of keys in the metadata tag. - - If the file has no tags at all, an empty list is returned. - """ - - if self.tags is None: - return [] - else: - return self.tags.keys() - - def delete(self, filename=None): - """Remove tags from a file.""" - - if self.tags is not None: - if filename is None: - filename = self.filename - else: - warnings.warn( - "delete(filename=...) is deprecated, reload the file", - DeprecationWarning) - return self.tags.delete(filename) - - def save(self, filename=None, **kwargs): - """Save metadata tags.""" - - if filename is None: - filename = self.filename - else: - warnings.warn( - "save(filename=...) is deprecated, reload the file", - DeprecationWarning) - if self.tags is not None: - return self.tags.save(filename, **kwargs) - else: - raise ValueError("no tags in file") - - def pprint(self): - """Print stream information and comment key=value pairs.""" - - stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) - try: - tags = self.tags.pprint() - except AttributeError: - return stream - else: - return stream + ((tags and "\n" + tags) or "") - - def add_tags(self): - """Adds new tags to the file. - - Raises if tags already exist. - """ - - raise NotImplementedError - - @property - def mime(self): - """A list of mime types""" - - mimes = [] - for Kind in type(self).__mro__: - for mime in getattr(Kind, '_mimes', []): - if mime not in mimes: - mimes.append(mime) - return mimes - - @staticmethod - def score(filename, fileobj, header): - raise NotImplementedError - - -def File(filename, options=None, easy=False): - """Guess the type of the file and try to open it. - - The file type is decided by several things, such as the first 128 - bytes (which usually contains a file type identifier), the - filename extension, and the presence of existing tags. - - If no appropriate type could be found, None is returned. - - :param options: Sequence of :class:`FileType` implementations, defaults to - all included ones. - - :param easy: If the easy wrappers should be returnd if available. - For example :class:`EasyMP3 ` instead - of :class:`MP3 `. - """ - - if options is None: - from mutagen.asf import ASF - from mutagen.apev2 import APEv2File - from mutagen.flac import FLAC - if easy: - from mutagen.easyid3 import EasyID3FileType as ID3FileType - else: - from mutagen.id3 import ID3FileType - if easy: - from mutagen.mp3 import EasyMP3 as MP3 - else: - from mutagen.mp3 import MP3 - from mutagen.oggflac import OggFLAC - from mutagen.oggspeex import OggSpeex - from mutagen.oggtheora import OggTheora - from mutagen.oggvorbis import OggVorbis - from mutagen.oggopus import OggOpus - if easy: - from mutagen.trueaudio import EasyTrueAudio as TrueAudio - else: - from mutagen.trueaudio import TrueAudio - from mutagen.wavpack import WavPack - if easy: - from mutagen.easymp4 import EasyMP4 as MP4 - else: - from mutagen.mp4 import MP4 - from mutagen.musepack import Musepack - from mutagen.monkeysaudio import MonkeysAudio - from mutagen.optimfrog import OptimFROG - options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, - FLAC, APEv2File, MP4, ID3FileType, WavPack, Musepack, - MonkeysAudio, OptimFROG, ASF, OggOpus] - - if not options: - return None - - fileobj = open(filename, "rb") - try: - header = fileobj.read(128) - # Sort by name after score. Otherwise import order affects - # Kind sort order, which affects treatment of things with - # equals scores. - results = [(Kind.score(filename, fileobj, header), Kind.__name__) - for Kind in options] - finally: - fileobj.close() - results = zip(results, options) - results.sort() - (score, name), Kind = results[-1] - if score > 0: - return Kind(filename) - else: - return None +Metadata diff --git a/lib/mutagen/_compat.py b/lib/mutagen/_compat.py new file mode 100644 index 00000000..fa01f72e --- /dev/null +++ b/lib/mutagen/_compat.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2013 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +import sys + + +PY2 = sys.version_info[0] == 2 +PY3 = not PY2 + +if PY2: + from StringIO import StringIO + BytesIO = StringIO + from cStringIO import StringIO as cBytesIO + + long_ = long + integer_types = (int, long) + string_types = (str, unicode) + text_type = unicode + + xrange = xrange + cmp = cmp + chr_ = chr + + def endswith(text, end): + return text.endswith(end) + + iteritems = lambda d: d.iteritems() + itervalues = lambda d: d.itervalues() + iterkeys = lambda d: d.iterkeys() + + iterbytes = lambda b: iter(b) + + exec("def reraise(tp, value, tb):\n raise tp, value, tb") + + def swap_to_string(cls): + if "__str__" in cls.__dict__: + cls.__unicode__ = cls.__str__ + + if "__bytes__" in cls.__dict__: + cls.__str__ = cls.__bytes__ + + return cls + +elif PY3: + from io import StringIO + StringIO = StringIO + from io import BytesIO + cBytesIO = BytesIO + + long_ = int + integer_types = (int,) + string_types = (str,) + text_type = str + + xrange = range + cmp = lambda a, b: (a > b) - (a < b) + chr_ = lambda x: bytes([x]) + + def endswith(text, end): + # usefull for paths which can be both, str and bytes + if isinstance(text, str): + if not isinstance(end, str): + end = end.decode("ascii") + else: + if not isinstance(end, bytes): + end = end.encode("ascii") + return text.endswith(end) + + iteritems = lambda d: iter(d.items()) + itervalues = lambda d: iter(d.values()) + iterkeys = lambda d: iter(d.keys()) + + iterbytes = lambda b: (bytes([v]) for v in b) + + def reraise(tp, value, tb): + raise tp(value).with_traceback(tb) + + def swap_to_string(cls): + return cls diff --git a/lib/mutagen/_constants.py b/lib/mutagen/_constants.py index f5ecd90c..62c1ce02 100644 --- a/lib/mutagen/_constants.py +++ b/lib/mutagen/_constants.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + """Constants used by Mutagen.""" GENRES = [ diff --git a/lib/mutagen/_file.py b/lib/mutagen/_file.py new file mode 100644 index 00000000..fc1caeb9 --- /dev/null +++ b/lib/mutagen/_file.py @@ -0,0 +1,237 @@ +# Copyright (C) 2005 Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +import warnings + +from mutagen._util import DictMixin + + +class FileType(DictMixin): + """An abstract object wrapping tags and audio stream information. + + Attributes: + + * info -- stream information (length, bitrate, sample rate) + * tags -- metadata tags, if any + + Each file format has different potential tags and stream + information. + + FileTypes implement an interface very similar to Metadata; the + dict interface, save, load, and delete calls on a FileType call + the appropriate methods on its tag data. + """ + + __module__ = "mutagen" + + info = None + tags = None + filename = None + _mimes = ["application/octet-stream"] + + def __init__(self, filename=None, *args, **kwargs): + if filename is None: + warnings.warn("FileType constructor requires a filename", + DeprecationWarning) + else: + self.load(filename, *args, **kwargs) + + def load(self, filename, *args, **kwargs): + raise NotImplementedError + + def __getitem__(self, key): + """Look up a metadata tag key. + + If the file has no tags at all, a KeyError is raised. + """ + + if self.tags is None: + raise KeyError(key) + else: + return self.tags[key] + + def __setitem__(self, key, value): + """Set a metadata tag. + + If the file has no tags, an appropriate format is added (but + not written until save is called). + """ + + if self.tags is None: + self.add_tags() + self.tags[key] = value + + def __delitem__(self, key): + """Delete a metadata tag key. + + If the file has no tags at all, a KeyError is raised. + """ + + if self.tags is None: + raise KeyError(key) + else: + del(self.tags[key]) + + def keys(self): + """Return a list of keys in the metadata tag. + + If the file has no tags at all, an empty list is returned. + """ + + if self.tags is None: + return [] + else: + return self.tags.keys() + + def delete(self, filename=None): + """Remove tags from a file.""" + + if self.tags is not None: + if filename is None: + filename = self.filename + else: + warnings.warn( + "delete(filename=...) is deprecated, reload the file", + DeprecationWarning) + return self.tags.delete(filename) + + def save(self, filename=None, **kwargs): + """Save metadata tags.""" + + if filename is None: + filename = self.filename + else: + warnings.warn( + "save(filename=...) is deprecated, reload the file", + DeprecationWarning) + if self.tags is not None: + return self.tags.save(filename, **kwargs) + else: + raise ValueError("no tags in file") + + def pprint(self): + """Print stream information and comment key=value pairs.""" + + stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) + try: + tags = self.tags.pprint() + except AttributeError: + return stream + else: + return stream + ((tags and "\n" + tags) or "") + + def add_tags(self): + """Adds new tags to the file. + + Raises if tags already exist. + """ + + raise NotImplementedError + + @property + def mime(self): + """A list of mime types""" + + mimes = [] + for Kind in type(self).__mro__: + for mime in getattr(Kind, '_mimes', []): + if mime not in mimes: + mimes.append(mime) + return mimes + + @staticmethod + def score(filename, fileobj, header): + raise NotImplementedError + + +class StreamInfo(object): + """Abstract stream information object. + + Provides attributes for length, bitrate, sample rate etc. + + See the implementations for details. + """ + + __module__ = "mutagen" + + def pprint(self): + """Print stream information""" + + raise NotImplementedError + + +def File(filename, options=None, easy=False): + """Guess the type of the file and try to open it. + + The file type is decided by several things, such as the first 128 + bytes (which usually contains a file type identifier), the + filename extension, and the presence of existing tags. + + If no appropriate type could be found, None is returned. + + :param options: Sequence of :class:`FileType` implementations, defaults to + all included ones. + + :param easy: If the easy wrappers should be returnd if available. + For example :class:`EasyMP3 ` instead + of :class:`MP3 `. + """ + + if options is None: + from mutagen.asf import ASF + from mutagen.apev2 import APEv2File + from mutagen.flac import FLAC + if easy: + from mutagen.easyid3 import EasyID3FileType as ID3FileType + else: + from mutagen.id3 import ID3FileType + if easy: + from mutagen.mp3 import EasyMP3 as MP3 + else: + from mutagen.mp3 import MP3 + from mutagen.oggflac import OggFLAC + from mutagen.oggspeex import OggSpeex + from mutagen.oggtheora import OggTheora + from mutagen.oggvorbis import OggVorbis + from mutagen.oggopus import OggOpus + if easy: + from mutagen.trueaudio import EasyTrueAudio as TrueAudio + else: + from mutagen.trueaudio import TrueAudio + from mutagen.wavpack import WavPack + if easy: + from mutagen.easymp4 import EasyMP4 as MP4 + else: + from mutagen.mp4 import MP4 + from mutagen.musepack import Musepack + from mutagen.monkeysaudio import MonkeysAudio + from mutagen.optimfrog import OptimFROG + from mutagen.aiff import AIFF + from mutagen.aac import AAC + options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, + FLAC, AIFF, APEv2File, MP4, ID3FileType, WavPack, + Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC] + + if not options: + return None + + fileobj = open(filename, "rb") + try: + header = fileobj.read(128) + # Sort by name after score. Otherwise import order affects + # Kind sort order, which affects treatment of things with + # equals scores. + results = [(Kind.score(filename, fileobj, header), Kind.__name__) + for Kind in options] + finally: + fileobj.close() + results = list(zip(results, options)) + results.sort() + (score, name), Kind = results[-1] + if score > 0: + return Kind(filename) + else: + return None diff --git a/lib/mutagen/_tags.py b/lib/mutagen/_tags.py new file mode 100644 index 00000000..8f9b9aa9 --- /dev/null +++ b/lib/mutagen/_tags.py @@ -0,0 +1,31 @@ +# Copyright (C) 2005 Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + + +class Metadata(object): + """An abstract dict-like object. + + Metadata is the base class for many of the tag objects in Mutagen. + """ + + __module__ = "mutagen" + + def __init__(self, *args, **kwargs): + if args or kwargs: + self.load(*args, **kwargs) + + def load(self, *args, **kwargs): + raise NotImplementedError + + def save(self, filename=None): + """Save changes to a file.""" + + raise NotImplementedError + + def delete(self, filename=None): + """Remove tags from a file.""" + + raise NotImplementedError diff --git a/lib/mutagen/_util.py b/lib/mutagen/_util.py index 2c8e1a56..570744b1 100644 --- a/lib/mutagen/_util.py +++ b/lib/mutagen/_util.py @@ -1,4 +1,6 @@ -# Copyright 2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,10 +13,76 @@ intended for internal use in Mutagen only. """ import struct +import codecs from fnmatch import fnmatchcase +from ._compat import chr_, text_type, PY2, iteritems, iterbytes, \ + integer_types, xrange + +class MutagenError(Exception): + """Base class for all custom exceptions in mutagen + + .. versionadded:: 1.25 + """ + + +def total_ordering(cls): + assert "__eq__" in cls.__dict__ + assert "__lt__" in cls.__dict__ + + cls.__le__ = lambda self, other: self == other or self < other + cls.__gt__ = lambda self, other: not (self == other or self < other) + cls.__ge__ = lambda self, other: not self < other + cls.__ne__ = lambda self, other: not self.__eq__(other) + + return cls + + +def hashable(cls): + """Makes sure the class is hashable. + + Needs a working __eq__ and __hash__ and will add a __ne__. + """ + + # py2 + assert "__hash__" in cls.__dict__ + # py3 + assert cls.__dict__["__hash__"] is not None + assert "__eq__" in cls.__dict__ + + cls.__ne__ = lambda self, other: not self.__eq__(other) + + return cls + + +def enum(cls): + assert cls.__bases__ == (object,) + + d = dict(cls.__dict__) + new_type = type(cls.__name__, (int,), d) + new_type.__module__ = cls.__module__ + + map_ = {} + for key, value in iteritems(d): + if key.upper() == key and isinstance(value, integer_types): + value_instance = new_type(value) + setattr(new_type, key, value_instance) + map_[value] = key + + def repr_(self): + if self in map_: + return "%s.%s" % (type(self).__name__, map_[self]) + else: + return "%s(%s)" % (type(self).__name__, self) + + setattr(new_type, "__repr__", repr_) + + return new_type + + +@total_ordering class DictMixin(object): """Implement the dict API using keys() and __*item__ methods. @@ -33,27 +101,37 @@ class DictMixin(object): def __iter__(self): return iter(self.keys()) - def has_key(self, key): + def __has_key(self, key): try: self[key] except KeyError: return False else: return True - __contains__ = has_key - iterkeys = lambda self: iter(self.keys()) + if PY2: + has_key = __has_key + + __contains__ = __has_key + + if PY2: + iterkeys = lambda self: iter(self.keys()) def values(self): - return map(self.__getitem__, self.keys()) - itervalues = lambda self: iter(self.values()) + return [self[k] for k in self.keys()] + + if PY2: + itervalues = lambda self: iter(self.values()) def items(self): - return zip(self.keys(), self.values()) - iteritems = lambda s: iter(s.items()) + return list(zip(self.keys(), self.values())) + + if PY2: + iteritems = lambda s: iter(s.items()) def clear(self): - map(self.__delitem__, self.keys()) + for key in list(self.keys()): + self.__delitem__(key) def pop(self, key, *args): if len(args) > 1: @@ -69,11 +147,11 @@ class DictMixin(object): return value def popitem(self): - try: - key = self.keys()[0] - return key, self.pop(key) - except IndexError: + for key in self.keys(): + break + else: raise KeyError("dictionary is empty") + return key, self.pop(key) def update(self, other=None, **kwargs): if other is None: @@ -81,7 +159,8 @@ class DictMixin(object): other = {} try: - map(self.__setitem__, other.keys(), other.values()) + for key, value in other.items(): + self.__setitem__(key, value) except AttributeError: for key, value in other: self[key] = value @@ -102,11 +181,11 @@ class DictMixin(object): def __repr__(self): return repr(dict(self.items())) - def __cmp__(self, other): - if other is None: - return 1 - else: - return cmp(dict(self.items()), other) + def __eq__(self, other): + return dict(self.items()) == other + + def __lt__(self, other): + return dict(self.items()) < other __hash__ = object.__hash__ @@ -132,56 +211,63 @@ class DictProxy(DictMixin): return self.__dict.keys() +def _fill_cdata(cls): + """Add struct pack/unpack functions""" + + funcs = {} + for key, name in [("b", "char"), ("h", "short"), + ("i", "int"), ("q", "longlong")]: + for echar, esuffix in [("<", "le"), (">", "be")]: + esuffix = "_" + esuffix + for unsigned in [True, False]: + s = struct.Struct(echar + (key.upper() if unsigned else key)) + get_wrapper = lambda f: lambda *a, **k: f(*a, **k)[0] + unpack = get_wrapper(s.unpack) + unpack_from = get_wrapper(s.unpack_from) + + def get_unpack_from(s): + def unpack_from(data, offset=0): + return s.unpack_from(data, offset)[0], offset + s.size + return unpack_from + + unpack_from = get_unpack_from(s) + pack = s.pack + + prefix = "u" if unsigned else "" + if s.size == 1: + esuffix = "" + bits = str(s.size * 8) + funcs["%s%s%s" % (prefix, name, esuffix)] = unpack + funcs["%sint%s%s" % (prefix, bits, esuffix)] = unpack + funcs["%s%s%s_from" % (prefix, name, esuffix)] = unpack_from + funcs["%sint%s%s_from" % (prefix, bits, esuffix)] = unpack_from + funcs["to_%s%s%s" % (prefix, name, esuffix)] = pack + funcs["to_%sint%s%s" % (prefix, bits, esuffix)] = pack + + for key, func in iteritems(funcs): + setattr(cls, key, staticmethod(func)) + + class cdata(object): - """C character buffer to Python numeric type conversions.""" + """C character buffer to Python numeric type conversions. + + For each size/sign/endianness: + uint32_le(data)/to_uint32_le(num)/uint32_le_from(data, offset=0) + """ from struct import error error = error - short_le = staticmethod(lambda data: struct.unpack('h', data)[0]) - ushort_be = staticmethod(lambda data: struct.unpack('>H', data)[0]) - - int_le = staticmethod(lambda data: struct.unpack('i', data)[0]) - uint_be = staticmethod(lambda data: struct.unpack('>I', data)[0]) - - longlong_le = staticmethod(lambda data: struct.unpack('q', data)[0]) - ulonglong_be = staticmethod(lambda data: struct.unpack('>Q', data)[0]) - - to_short_le = staticmethod(lambda data: struct.pack('h', data)) - to_ushort_be = staticmethod(lambda data: struct.pack('>H', data)) - - to_int_le = staticmethod(lambda data: struct.pack('i', data)) - to_uint_be = staticmethod(lambda data: struct.pack('>I', data)) - - to_longlong_le = staticmethod(lambda data: struct.pack('q', data)) - to_ulonglong_be = staticmethod(lambda data: struct.pack('>Q', data)) - - bitswap = ''.join([chr(sum([((val >> i) & 1) << (7-i) for i in range(8)])) - for val in range(256)]) - del(i) - del(val) + bitswap = b''.join( + chr_(sum(((val >> i) & 1) << (7 - i) for i in range(8))) + for val in range(256)) test_bit = staticmethod(lambda value, n: bool((value >> n) & 1)) +_fill_cdata(cdata) + + def lock(fileobj): """Lock a file object 'safely'. @@ -223,7 +309,7 @@ def unlock(fileobj): fcntl.lockf(fileobj, fcntl.LOCK_UN) -def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16): +def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): """Insert size bytes of empty space starting at offset. fobj must be an open file object, open rb+ or @@ -237,16 +323,16 @@ def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16): fobj.seek(0, 2) filesize = fobj.tell() movesize = filesize - offset - fobj.write('\x00' * size) + fobj.write(b'\x00' * size) fobj.flush() try: try: import mmap - map = mmap.mmap(fobj.fileno(), filesize + size) + file_map = mmap.mmap(fobj.fileno(), filesize + size) try: - map.move(offset + size, offset, movesize) + file_map.move(offset + size, offset, movesize) finally: - map.close() + file_map.close() except (ValueError, EnvironmentError, ImportError): # handle broken mmap scenarios locked = lock(fobj) @@ -258,7 +344,7 @@ def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16): # the file out several megs. while padsize: addsize = min(BUFFER_SIZE, padsize) - fobj.write("\x00" * addsize) + fobj.write(b"\x00" * addsize) padsize -= addsize fobj.seek(filesize, 0) @@ -285,7 +371,7 @@ def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16): unlock(fobj) -def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16): +def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): """Delete size bytes of empty space starting at offset. fobj must be an open file object, open rb+ or @@ -305,11 +391,11 @@ def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16): fobj.flush() try: import mmap - map = mmap.mmap(fobj.fileno(), filesize) + file_map = mmap.mmap(fobj.fileno(), filesize) try: - map.move(offset, offset + size, movesize) + file_map.move(offset, offset + size, movesize) finally: - map.close() + file_map.close() except (ValueError, EnvironmentError, ImportError): # handle broken mmap scenarios locked = lock(fobj) @@ -328,22 +414,190 @@ def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16): unlock(fobj) -def utf8(data): - """Convert a basestring to a valid UTF-8 str.""" - - if isinstance(data, str): - return data.decode("utf-8", "replace").encode("utf-8") - elif isinstance(data, unicode): - return data.encode("utf-8") - else: - raise TypeError("only unicode/str types can be converted to UTF-8") - - def dict_match(d, key, default=None): - try: + """Like __getitem__ but works as if the keys() are all filename patterns. + Returns the value of any dict key that matches the passed key. + """ + + if key in d and "[" not in key: return d[key] - except KeyError: - for pattern, value in d.iteritems(): + else: + for pattern, value in iteritems(d): if fnmatchcase(key, pattern): return value return default + + +def decode_terminated(data, encoding, strict=True): + """Returns the decoded data until the first NULL terminator + and all data after it. + + In case the data can't be decoded raises UnicodeError. + In case the encoding is not found raises LookupError. + In case the data isn't null terminated (even if it is encoded correctly) + raises ValueError except if strict is False, then the decoded string + will be returned anyway. + """ + + codec_info = codecs.lookup(encoding) + + # normalize encoding name so we can compare by name + encoding = codec_info.name + + # fast path + if encoding in ("utf-8", "iso8859-1"): + index = data.find(b"\x00") + if index == -1: + # make sure we raise UnicodeError first, like in the slow path + res = data.decode(encoding), b"" + if strict: + raise ValueError("not null terminated") + else: + return res + return data[:index].decode(encoding), data[index + 1:] + + # slow path + decoder = codec_info.incrementaldecoder() + r = [] + for i, b in enumerate(iterbytes(data)): + c = decoder.decode(b) + if c == u"\x00": + return u"".join(r), data[i + 1:] + r.append(c) + else: + # make sure the decoder is finished + r.append(decoder.decode(b"", True)) + if strict: + raise ValueError("not null terminated") + return u"".join(r), b"" + + +def split_escape(string, sep, maxsplit=None, escape_char="\\"): + """Like unicode/str/bytes.split but allows for the separator to be escaped + + If passed unicode/str/bytes will only return list of unicode/str/bytes. + """ + + assert len(sep) == 1 + assert len(escape_char) == 1 + + if isinstance(string, bytes): + if isinstance(escape_char, text_type): + escape_char = escape_char.encode("ascii") + iter_ = iterbytes + else: + iter_ = iter + + if maxsplit is None: + maxsplit = len(string) + + empty = string[:0] + result = [] + current = empty + escaped = False + for char in iter_(string): + if escaped: + if char != escape_char and char != sep: + current += escape_char + current += char + escaped = False + else: + if char == escape_char: + escaped = True + elif char == sep and len(result) < maxsplit: + result.append(current) + current = empty + else: + current += char + result.append(current) + return result + + +class BitReaderError(Exception): + pass + + +class BitReader(object): + + def __init__(self, fileobj): + self._fileobj = fileobj + self._buffer = 0 + self._bits = 0 + self._pos = fileobj.tell() + + def bits(self, count): + """Reads `count` bits and returns an uint, MSB read first. + + May raise BitReaderError if not enough data could be read or + IOError by the underlying file object. + """ + + if count < 0: + raise ValueError + + if count > self._bits: + n_bytes = (count - self._bits + 7) // 8 + data = self._fileobj.read(n_bytes) + if len(data) != n_bytes: + raise BitReaderError("not enough data") + for b in bytearray(data): + self._buffer = (self._buffer << 8) | b + self._bits += n_bytes * 8 + + self._bits -= count + value = self._buffer >> self._bits + self._buffer &= (1 << self._bits) - 1 + assert self._bits < 8 + return value + + def bytes(self, count): + """Returns a bytearray of length `count`. Works unaligned.""" + + if count < 0: + raise ValueError + + # fast path + if self._bits == 0: + data = self._fileobj.read(count) + if len(data) != count: + raise BitReaderError("not enough data") + return data + + return bytes(bytearray(self.bits(8) for _ in xrange(count))) + + def skip(self, count): + """Skip `count` bits. + + Might raise BitReaderError if there wasn't enough data to skip, + but might also fail on the next bits() instead. + """ + + if count < 0: + raise ValueError + + if count <= self._bits: + self.bits(count) + else: + count -= self.align() + n_bytes = count // 8 + self._fileobj.seek(n_bytes, 1) + count -= n_bytes * 8 + self.bits(count) + + def get_position(self): + """Returns the amount of bits read or skipped so far""" + + return (self._fileobj.tell() - self._pos) * 8 - self._bits + + def align(self): + """Align to the next byte, returns the amount of bits skipped""" + + bits = self._bits + self._buffer = 0 + self._bits = 0 + return bits + + def is_aligned(self): + """If we are currently aligned to bytes and nothing is buffered""" + + return self._bits == 0 diff --git a/lib/mutagen/_vorbis.py b/lib/mutagen/_vorbis.py index 4ee8da4a..b5cd9254 100644 --- a/lib/mutagen/_vorbis.py +++ b/lib/mutagen/_vorbis.py @@ -1,5 +1,7 @@ -# Vorbis comment support for Mutagen -# Copyright 2005-2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2005-2006 Joe Wreschnig +# 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as @@ -16,9 +18,8 @@ The specification is at http://www.xiph.org/vorbis/doc/v-comment.html. import sys -from cStringIO import StringIO - import mutagen +from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2 from mutagen._util import DictMixin, cdata @@ -27,13 +28,20 @@ def is_valid_key(key): Valid Vorbis comment keys are printable ASCII between 0x20 (space) and 0x7D ('}'), excluding '='. + + Takes str/unicode in Python 2, unicode in Python 3 """ + + if PY3 and isinstance(key, bytes): + raise TypeError("needs to be str not bytes") + for c in key: if c < " " or c > "}" or c == "=": return False else: return bool(key) + istag = is_valid_key @@ -60,7 +68,8 @@ class VComment(mutagen.Metadata, list): file-like object, not a filename. Attributes: - vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen' + + * vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen' """ vendor = u"Mutagen " + mutagen.version_string @@ -70,25 +79,26 @@ class VComment(mutagen.Metadata, list): # override just load and get equivalent magic for the # constructor. if data is not None: - if isinstance(data, str): - data = StringIO(data) + if isinstance(data, bytes): + data = BytesIO(data) elif not hasattr(data, 'read'): - raise TypeError("VComment requires string data or a file-like") + raise TypeError("VComment requires bytes or a file-like") self.load(data, *args, **kwargs) def load(self, fileobj, errors='replace', framing=True): """Parse a Vorbis comment from a file-like object. Keyword arguments: - errors: - 'strict', 'replace', or 'ignore'. This affects Unicode decoding - and how other malformed content is interpreted. - framing -- if true, fail if a framing bit is not present + + * errors: + 'strict', 'replace', or 'ignore'. This affects Unicode decoding + and how other malformed content is interpreted. + * framing -- if true, fail if a framing bit is not present Framing bits are required by the Vorbis comment specification, but are not used in FLAC Vorbis comment blocks. - """ + try: vendor_length = cdata.uint_le(fileobj.read(4)) self.vendor = fileobj.read(vendor_length).decode('utf-8', errors) @@ -101,21 +111,25 @@ class VComment(mutagen.Metadata, list): raise error("cannot read %d bytes, too large" % length) try: tag, value = string.split('=', 1) - except ValueError, err: + except ValueError as err: if errors == "ignore": continue elif errors == "replace": tag, value = u"unknown%d" % i, string else: - raise VorbisEncodingError, err, sys.exc_info()[2] + reraise(VorbisEncodingError, err, sys.exc_info()[2]) try: tag = tag.encode('ascii', errors) except UnicodeEncodeError: raise VorbisEncodingError("invalid tag name %r" % tag) else: + # string keys in py3k + if PY3: + tag = tag.decode("ascii") if is_valid_key(tag): self.append((tag, value)) - if framing and not ord(fileobj.read(1)) & 0x01: + + if framing and not bytearray(fileobj.read(1))[0] & 0x01: raise VorbisUnsetFrameError("framing bit was unset") except (cdata.error, TypeError): raise error("file is not a valid Vorbis comment") @@ -126,9 +140,14 @@ class VComment(mutagen.Metadata, list): Check to make sure every key used is a valid Vorbis key, and that every value used is a valid Unicode or UTF-8 string. If any invalid keys or values are found, a ValueError is raised. + + In Python 3 all keys and values have to be a string. """ - if not isinstance(self.vendor, unicode): + if not isinstance(self.vendor, text_type): + if PY3: + raise ValueError("vendor needs to be str") + try: self.vendor.decode('utf-8') except UnicodeDecodeError: @@ -138,19 +157,25 @@ class VComment(mutagen.Metadata, list): try: if not is_valid_key(key): raise ValueError - except: + except TypeError: raise ValueError("%r is not a valid key" % key) - if not isinstance(value, unicode): + + if not isinstance(value, text_type): + if PY3: + raise ValueError("%r needs to be str" % key) + try: - value.encode("utf-8") + value.decode("utf-8") except: raise ValueError("%r is not a valid value" % value) - else: - return True + + return True def clear(self): """Clear all keys from the comment.""" - del(self[:]) + + for i in list(self): + self.remove(i) def write(self, framing=True): """Return a string representation of the data. @@ -159,25 +184,41 @@ class VComment(mutagen.Metadata, list): invalid data may raise a ValueError. Keyword arguments: - framing -- if true, append a framing bit (see load) + + * framing -- if true, append a framing bit (see load) """ self.validate() - f = StringIO() - f.write(cdata.to_uint_le(len(self.vendor.encode('utf-8')))) - f.write(self.vendor.encode('utf-8')) + def _encode(value): + if not isinstance(value, bytes): + return value.encode('utf-8') + return value + + f = BytesIO() + vendor = _encode(self.vendor) + f.write(cdata.to_uint_le(len(vendor))) + f.write(vendor) f.write(cdata.to_uint_le(len(self))) for tag, value in self: - comment = "%s=%s" % (tag, value.encode('utf-8')) + tag = _encode(tag) + value = _encode(value) + comment = tag + b"=" + value f.write(cdata.to_uint_le(len(comment))) f.write(comment) if framing: - f.write("\x01") + f.write(b"\x01") return f.getvalue() def pprint(self): - return "\n".join(["%s=%s" % (k.lower(), v) for k, v in self]) + + def _decode(value): + if not isinstance(value, text_type): + return value.decode('utf-8', 'replace') + return value + + tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self] + return u"\n".join(tags) class VCommentDict(VComment, DictMixin): @@ -199,9 +240,17 @@ class VCommentDict(VComment, DictMixin): This is a copy, so comment['title'].append('a title') will not work. - """ - key = key.lower().encode('ascii') + + # PY3 only + if isinstance(key, slice): + return VComment.__getitem__(self, key) + + if not is_valid_key(key): + raise ValueError + + key = key.lower() + values = [value for (k, value) in self if k.lower() == key] if not values: raise KeyError(key) @@ -210,16 +259,29 @@ class VCommentDict(VComment, DictMixin): def __delitem__(self, key): """Delete all values associated with the key.""" - key = key.lower().encode('ascii') - to_delete = filter(lambda x: x[0].lower() == key, self) + + # PY3 only + if isinstance(key, slice): + return VComment.__delitem__(self, key) + + if not is_valid_key(key): + raise ValueError + + key = key.lower() + to_delete = [x for x in self if x[0].lower() == key] if not to_delete: raise KeyError(key) else: - map(self.remove, to_delete) + for item in to_delete: + self.remove(item) def __contains__(self, key): """Return true if the key has any values.""" - key = key.lower().encode('ascii') + + if not is_valid_key(key): + raise ValueError + + key = key.lower() for k, value in self: if k.lower() == key: return True @@ -232,23 +294,34 @@ class VCommentDict(VComment, DictMixin): Setting a value overwrites all old ones. The value may be a list of Unicode or UTF-8 strings, or a single Unicode or UTF-8 string. - """ - key = key.encode('ascii') + # PY3 only + if isinstance(key, slice): + return VComment.__setitem__(self, key, values) + + if not is_valid_key(key): + raise ValueError + if not isinstance(values, list): values = [values] try: del(self[key]) except KeyError: pass + + if PY2: + key = key.encode('ascii') + for value in values: self.append((key, value)) def keys(self): """Return all keys in the comment.""" - return self and list(set([k.lower() for k, v in self])) + + return list(set([k.lower() for k, v in self])) def as_dict(self): """Return a copy of the comment data in a real dict.""" + return dict([(key, self[key]) for key in self.keys()]) diff --git a/lib/mutagen/aac.py b/lib/mutagen/aac.py new file mode 100644 index 00000000..62488711 --- /dev/null +++ b/lib/mutagen/aac.py @@ -0,0 +1,407 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2014 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +""" +* ADTS - Audio Data Transport Stream +* ADIF - Audio Data Interchange Format +* See ISO/IEC 13818-7 / 14496-03 +""" + +from mutagen import StreamInfo +from mutagen._file import FileType +from mutagen._util import BitReader, BitReaderError, MutagenError +from mutagen._compat import endswith, xrange + + +_FREQS = [ + 96000, 88200, 64000, 48000, + 44100, 32000, 24000, 22050, + 16000, 12000, 11025, 8000, + 7350, +] + + +class _ADTSStream(object): + """Represents a series of frames belonging to the same stream""" + + parsed_frames = 0 + """Number of successfully parsed frames""" + + offset = 0 + """offset in bytes at which the stream starts (the first sync word)""" + + @classmethod + def find_stream(cls, fileobj, max_bytes): + """Returns a possibly valid _ADTSStream or None. + + Args: + max_bytes (int): maximum bytes to read + """ + + r = BitReader(fileobj) + stream = cls(r) + if stream.sync(max_bytes): + stream.offset = (r.get_position() - 12) // 8 + return stream + + def sync(self, max_bytes): + """Find the next sync. + Returns True if found.""" + + # at least 2 bytes for the sync + max_bytes = max(max_bytes, 2) + + r = self._r + r.align() + while max_bytes > 0: + try: + b = r.bytes(1) + if b == b"\xff": + if r.bits(4) == 0xf: + return True + r.align() + max_bytes -= 2 + else: + max_bytes -= 1 + except BitReaderError: + return False + return False + + def __init__(self, r): + """Use _ADTSStream.find_stream to create a stream""" + + self._fixed_header_key = None + self._r = r + self.offset = -1 + self.parsed_frames = 0 + + self._samples = 0 + self._payload = 0 + self._start = r.get_position() / 8 + self._last = self._start + + @property + def bitrate(self): + """Bitrate of the raw aac blocks, excluding framing/crc""" + + assert self.parsed_frames, "no frame parsed yet" + + if self._samples == 0: + return 0 + + return (8 * self._payload * self.frequency) // self._samples + + @property + def samples(self): + """samples so far""" + + assert self.parsed_frames, "no frame parsed yet" + + return self._samples + + @property + def size(self): + """bytes read in the stream so far (including framing)""" + + assert self.parsed_frames, "no frame parsed yet" + + return self._last - self._start + + @property + def channels(self): + """0 means unknown""" + + assert self.parsed_frames, "no frame parsed yet" + + b_index = self._fixed_header_key[6] + if b_index == 7: + return 8 + elif b_index > 7: + return 0 + else: + return b_index + + @property + def frequency(self): + """0 means unknown""" + + assert self.parsed_frames, "no frame parsed yet" + + f_index = self._fixed_header_key[4] + try: + return _FREQS[f_index] + except IndexError: + return 0 + + def parse_frame(self): + """True if parsing was successful. + Fails either because the frame wasn't valid or the stream ended. + """ + + try: + return self._parse_frame() + except BitReaderError: + return False + + def _parse_frame(self): + r = self._r + # start == position of sync word + start = r.get_position() - 12 + + # adts_fixed_header + id_ = r.bits(1) + layer = r.bits(2) + protection_absent = r.bits(1) + + profile = r.bits(2) + sampling_frequency_index = r.bits(4) + private_bit = r.bits(1) + # TODO: if 0 we could parse program_config_element() + channel_configuration = r.bits(3) + original_copy = r.bits(1) + home = r.bits(1) + + # the fixed header has to be the same for every frame in the stream + fixed_header_key = ( + id_, layer, protection_absent, profile, sampling_frequency_index, + private_bit, channel_configuration, original_copy, home, + ) + + if self._fixed_header_key is None: + self._fixed_header_key = fixed_header_key + else: + if self._fixed_header_key != fixed_header_key: + return False + + # adts_variable_header + r.skip(2) # copyright_identification_bit/start + frame_length = r.bits(13) + r.skip(11) # adts_buffer_fullness + nordbif = r.bits(2) + # adts_variable_header end + + crc_overhead = 0 + if not protection_absent: + crc_overhead += (nordbif + 1) * 16 + if nordbif != 0: + crc_overhead *= 2 + + left = (frame_length * 8) - (r.get_position() - start) + if left < 0: + return False + r.skip(left) + assert r.is_aligned() + + self._payload += (left - crc_overhead) / 8 + self._samples += (nordbif + 1) * 1024 + self._last = r.get_position() / 8 + + self.parsed_frames += 1 + return True + + +class ProgramConfigElement(object): + + element_instance_tag = None + object_type = None + sampling_frequency_index = None + channels = None + + def __init__(self, r): + """Reads the program_config_element() + + Raises BitReaderError + """ + + self.element_instance_tag = r.bits(4) + self.object_type = r.bits(2) + self.sampling_frequency_index = r.bits(4) + num_front_channel_elements = r.bits(4) + num_side_channel_elements = r.bits(4) + num_back_channel_elements = r.bits(4) + num_lfe_channel_elements = r.bits(2) + num_assoc_data_elements = r.bits(3) + num_valid_cc_elements = r.bits(4) + + mono_mixdown_present = r.bits(1) + if mono_mixdown_present == 1: + r.skip(4) + stereo_mixdown_present = r.bits(1) + if stereo_mixdown_present == 1: + r.skip(4) + matrix_mixdown_idx_present = r.bits(1) + if matrix_mixdown_idx_present == 1: + r.skip(3) + + elms = num_front_channel_elements + num_side_channel_elements + \ + num_back_channel_elements + channels = 0 + for i in xrange(elms): + channels += 1 + element_is_cpe = r.bits(1) + if element_is_cpe: + channels += 1 + r.skip(4) + channels += num_lfe_channel_elements + self.channels = channels + + r.skip(4 * num_lfe_channel_elements) + r.skip(4 * num_assoc_data_elements) + r.skip(5 * num_valid_cc_elements) + r.align() + comment_field_bytes = r.bits(8) + r.skip(8 * comment_field_bytes) + + +class AACError(MutagenError): + pass + + +class AACInfo(StreamInfo): + """AAC stream information. + + Attributes: + + * channels -- number of audio channels + * length -- file length in seconds, as a float + * sample_rate -- audio sampling rate in Hz + * bitrate -- audio bitrate, in bits per second + + The length of the stream is just a guess and might not be correct. + """ + + channels = 0 + length = 0 + sample_rate = 0 + bitrate = 0 + + def __init__(self, fileobj): + # skip id3v2 header + start_offset = 0 + header = fileobj.read(10) + from mutagen.id3 import BitPaddedInt + if header.startswith(b"ID3"): + size = BitPaddedInt(header[6:]) + start_offset = size + 10 + + fileobj.seek(start_offset) + adif = fileobj.read(4) + if adif == b"ADIF": + self._parse_adif(fileobj) + self._type = "ADIF" + else: + self._parse_adts(fileobj, start_offset) + self._type = "ADTS" + + def _parse_adif(self, fileobj): + r = BitReader(fileobj) + try: + copyright_id_present = r.bits(1) + if copyright_id_present: + r.skip(72) # copyright_id + r.skip(1 + 1) # original_copy, home + bitstream_type = r.bits(1) + self.bitrate = r.bits(23) + npce = r.bits(4) + if bitstream_type == 0: + r.skip(20) # adif_buffer_fullness + + pce = ProgramConfigElement(r) + try: + self.sample_rate = _FREQS[pce.sampling_frequency_index] + except IndexError: + pass + self.channels = pce.channels + + # other pces.. + for i in xrange(npce): + ProgramConfigElement(r) + r.align() + except BitReaderError as e: + raise AACError(e) + + # use bitrate + data size to guess length + start = fileobj.tell() + fileobj.seek(0, 2) + length = fileobj.tell() - start + if self.bitrate != 0: + self.length = (8.0 * length) / self.bitrate + + def _parse_adts(self, fileobj, start_offset): + max_initial_read = 512 + max_resync_read = 10 + max_sync_tries = 10 + + frames_max = 100 + frames_needed = 3 + + # Try up to X times to find a sync word and read up to Y frames. + # If more than Z frames are valid we assume a valid stream + offset = start_offset + for i in xrange(max_sync_tries): + fileobj.seek(offset) + s = _ADTSStream.find_stream(fileobj, max_initial_read) + if s is None: + raise AACError("sync not found") + # start right after the last found offset + offset += s.offset + 1 + + for i in xrange(frames_max): + if not s.parse_frame(): + break + if not s.sync(max_resync_read): + break + + if s.parsed_frames >= frames_needed: + break + else: + raise AACError( + "no valid stream found (only %d frames)" % s.parsed_frames) + + self.sample_rate = s.frequency + self.channels = s.channels + self.bitrate = s.bitrate + + # size from stream start to end of file + fileobj.seek(0, 2) + stream_size = fileobj.tell() - (offset + s.offset) + # approx + self.length = float(s.samples * stream_size) / (s.size * s.frequency) + + def pprint(self): + return "AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( + self._type, self.sample_rate, self.length, self.channels, + self.bitrate) + + +class AAC(FileType): + """Load ADTS or ADIF streams containing AAC. + + Tagging is not supported. + Use the ID3/APEv2 classes directly instead. + """ + + _mimes = ["audio/x-aac"] + + def load(self, filename): + self.filename = filename + with open(filename, "rb") as h: + self.info = AACInfo(h) + + @staticmethod + def score(filename, fileobj, header): + filename = filename.lower() + s = endswith(filename, ".aac") or endswith(filename, ".adts") or \ + endswith(filename, ".adif") + s += b"ADIF" in header + return s + + +Open = AAC +error = AACError + +__all__ = ["AAC", "Open"] diff --git a/lib/mutagen/aiff.py b/lib/mutagen/aiff.py new file mode 100644 index 00000000..1618caa3 --- /dev/null +++ b/lib/mutagen/aiff.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2014 Evan Purkhiser +# 2014 Ben Ockmore +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +"""AIFF audio stream information and tags.""" + +# NOTE from Ben Ockmore - according to the Py3k migration guidelines, AIFF +# chunk keys should be unicode in Py3k, and unicode or bytes in Py2k (ASCII). +# To make this easier, chunk keys should be stored internally as unicode. + +import struct +from struct import pack + +from ._compat import endswith, text_type, PY3 +from mutagen import StreamInfo, FileType + +from mutagen.id3 import ID3 +from mutagen.id3._util import error as ID3Error +from mutagen._util import insert_bytes, delete_bytes, MutagenError + +__all__ = ["AIFF", "Open", "delete"] + + +class error(MutagenError, RuntimeError): + pass + + +class InvalidChunk(error, IOError): + pass + + +# based on stdlib's aifc +_HUGE_VAL = 1.79769313486231e+308 + + +def is_valid_chunk_id(id): + if not isinstance(id, text_type): + if PY3: + raise TypeError("AIFF chunk must be unicode") + + try: + id = id.decode('ascii') + except UnicodeDecodeError: + return False + + return ((len(id) <= 4) and (min(id) >= u' ') and + (max(id) <= u'~')) + + +def read_float(data): # 10 bytes + expon, himant, lomant = struct.unpack('>hLL', data) + sign = 1 + if expon < 0: + sign = -1 + expon = expon + 0x8000 + if expon == himant == lomant == 0: + f = 0.0 + elif expon == 0x7FFF: + f = _HUGE_VAL + else: + expon = expon - 16383 + f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) + return sign * f + + +class IFFChunk(object): + """Representation of a single IFF chunk""" + + # Chunk headers are 8 bytes long (4 for ID and 4 for the size) + HEADER_SIZE = 8 + + def __init__(self, fileobj, parent_chunk=None): + self.__fileobj = fileobj + self.parent_chunk = parent_chunk + self.offset = fileobj.tell() + + header = fileobj.read(self.HEADER_SIZE) + if len(header) < self.HEADER_SIZE: + raise InvalidChunk() + + self.id, self.data_size = struct.unpack('>4si', header) + + if not isinstance(self.id, text_type): + self.id = self.id.decode('ascii') + + if not is_valid_chunk_id(self.id): + raise InvalidChunk() + + self.size = self.HEADER_SIZE + self.data_size + self.data_offset = fileobj.tell() + self.data = None + + def read(self): + """Read the chunks data""" + self.__fileobj.seek(self.data_offset) + self.data = self.__fileobj.read(self.data_size) + + def delete(self): + """Removes the chunk from the file""" + delete_bytes(self.__fileobj, self.size, self.offset) + if self.parent_chunk is not None: + self.parent_chunk.resize(self.parent_chunk.data_size - self.size) + + def resize(self, data_size): + """Update the size of the chunk""" + self.__fileobj.seek(self.offset + 4) + self.__fileobj.write(pack('>I', data_size)) + if self.parent_chunk is not None: + size_diff = self.data_size - data_size + self.parent_chunk.resize(self.parent_chunk.data_size - size_diff) + self.data_size = data_size + self.size = data_size + self.HEADER_SIZE + + +class IFFFile(object): + """Representation of a IFF file""" + + def __init__(self, fileobj): + self.__fileobj = fileobj + self.__chunks = {} + + # AIFF Files always start with the FORM chunk which contains a 4 byte + # ID before the start of other chunks + fileobj.seek(0) + self.__chunks[u'FORM'] = IFFChunk(fileobj) + + # Skip past the 4 byte FORM id + fileobj.seek(IFFChunk.HEADER_SIZE + 4) + + # Where the next chunk can be located. We need to keep track of this + # since the size indicated in the FORM header may not match up with the + # offset determined from the size of the last chunk in the file + self.__next_offset = fileobj.tell() + + # Load all of the chunks + while True: + try: + chunk = IFFChunk(fileobj, self[u'FORM']) + except InvalidChunk: + break + self.__chunks[chunk.id.strip()] = chunk + + # Calculate the location of the next chunk, + # considering the pad byte + self.__next_offset = chunk.offset + chunk.size + self.__next_offset += self.__next_offset % 2 + fileobj.seek(self.__next_offset) + + def __contains__(self, id_): + """Check if the IFF file contains a specific chunk""" + + if not isinstance(id_, text_type): + id_ = id_.decode('ascii') + + if not is_valid_chunk_id(id_): + raise KeyError("AIFF key must be four ASCII characters.") + + return id_ in self.__chunks + + def __getitem__(self, id_): + """Get a chunk from the IFF file""" + + if not isinstance(id_, text_type): + id_ = id_.decode('ascii') + + if not is_valid_chunk_id(id_): + raise KeyError("AIFF key must be four ASCII characters.") + + try: + return self.__chunks[id_] + except KeyError: + raise KeyError( + "%r has no %r chunk" % (self.__fileobj.name, id_)) + + def __delitem__(self, id_): + """Remove a chunk from the IFF file""" + + if not isinstance(id_, text_type): + id_ = id_.decode('ascii') + + if not is_valid_chunk_id(id_): + raise KeyError("AIFF key must be four ASCII characters.") + + self.__chunks.pop(id_).delete() + + def insert_chunk(self, id_): + """Insert a new chunk at the end of the IFF file""" + + if not isinstance(id_, text_type): + id_ = id_.decode('ascii') + + if not is_valid_chunk_id(id_): + raise KeyError("AIFF key must be four ASCII characters.") + + self.__fileobj.seek(self.__next_offset) + self.__fileobj.write(pack('>4si', id_.ljust(4).encode('ascii'), 0)) + self.__fileobj.seek(self.__next_offset) + chunk = IFFChunk(self.__fileobj, self[u'FORM']) + self[u'FORM'].resize(self[u'FORM'].data_size + chunk.size) + + self.__chunks[id_] = chunk + self.__next_offset = chunk.offset + chunk.size + + +class AIFFInfo(StreamInfo): + """AIFF audio stream information. + + Information is parsed from the COMM chunk of the AIFF file + + Useful attributes: + + * length -- audio length, in seconds + * bitrate -- audio bitrate, in bits per second + * channels -- The number of audio channels + * sample_rate -- audio sample rate, in Hz + * sample_size -- The audio sample size + """ + + length = 0 + bitrate = 0 + channels = 0 + sample_rate = 0 + + def __init__(self, fileobj): + iff = IFFFile(fileobj) + try: + common_chunk = iff[u'COMM'] + except KeyError as e: + raise error(str(e)) + + common_chunk.read() + + info = struct.unpack('>hLh10s', common_chunk.data[:18]) + channels, frame_count, sample_size, sample_rate = info + + self.sample_rate = int(read_float(sample_rate)) + self.sample_size = sample_size + self.channels = channels + self.bitrate = channels * sample_size * self.sample_rate + self.length = frame_count / float(self.sample_rate) + + def pprint(self): + return "%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( + self.channels, self.bitrate, self.sample_rate, self.length) + + +class _IFFID3(ID3): + """A AIFF file with ID3v2 tags""" + + def _load_header(self): + try: + self._fileobj.seek(IFFFile(self._fileobj)[u'ID3'].data_offset) + except (InvalidChunk, KeyError): + raise ID3Error() + super(_IFFID3, self)._load_header() + + def save(self, filename=None, v2_version=4, v23_sep='/'): + """Save ID3v2 data to the AIFF file""" + + framedata = self._prepare_framedata(v2_version, v23_sep) + framesize = len(framedata) + + if filename is None: + filename = self.filename + + # Unlike the parent ID3.save method, we won't save to a blank file + # since we would have to construct a empty AIFF file + fileobj = open(filename, 'rb+') + iff_file = IFFFile(fileobj) + + try: + if u'ID3' not in iff_file: + iff_file.insert_chunk(u'ID3') + + chunk = iff_file[u'ID3'] + fileobj.seek(chunk.data_offset) + + header = fileobj.read(10) + header = self._prepare_id3_header(header, framesize, v2_version) + header, new_size, _ = header + + data = header + framedata + (b'\x00' * (new_size - framesize)) + + # Include ID3 header size in 'new_size' calculation + new_size += 10 + + # Expand the chunk if necessary, including pad byte + if new_size > chunk.size: + insert_at = chunk.offset + chunk.size + insert_size = new_size - chunk.size + new_size % 2 + insert_bytes(fileobj, insert_size, insert_at) + chunk.resize(new_size) + + fileobj.seek(chunk.data_offset) + fileobj.write(data) + finally: + fileobj.close() + + def delete(self, filename=None): + """Completely removes the ID3 chunk from the AIFF file""" + + if filename is None: + filename = self.filename + delete(filename) + self.clear() + + +def delete(filename): + """Completely removes the ID3 chunk from the AIFF file""" + + with open(filename, "rb+") as file_: + try: + del IFFFile(file_)[u'ID3'] + except KeyError: + pass + + +class AIFF(FileType): + """An AIFF audio file. + + :ivar info: :class:`AIFFInfo` + :ivar tags: :class:`ID3` + """ + + _mimes = ["audio/aiff", "audio/x-aiff"] + + @staticmethod + def score(filename, fileobj, header): + filename = filename.lower() + + return (header.startswith(b"FORM") * 2 + endswith(filename, b".aif") + + endswith(filename, b".aiff") + endswith(filename, b".aifc")) + + def add_tags(self): + """Add an empty ID3 tag to the file.""" + if self.tags is None: + self.tags = _IFFID3() + else: + raise error("an ID3 tag already exists") + + def load(self, filename, **kwargs): + """Load stream and tag information from a file.""" + self.filename = filename + + try: + self.tags = _IFFID3(filename, **kwargs) + except ID3Error: + self.tags = None + + try: + fileobj = open(filename, "rb") + self.info = AIFFInfo(fileobj) + finally: + fileobj.close() + + +Open = AIFF diff --git a/lib/mutagen/apev2.py b/lib/mutagen/apev2.py index aa1e00e6..6eee43ce 100644 --- a/lib/mutagen/apev2.py +++ b/lib/mutagen/apev2.py @@ -1,6 +1,6 @@ -# An APEv2 tag reader -# -# Copyright 2005 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -30,16 +30,31 @@ http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification. __all__ = ["APEv2", "APEv2File", "Open", "delete"] +import sys import struct -from cStringIO import StringIO +from collections import MutableSequence -from mutagen import Metadata, FileType -from mutagen._util import DictMixin, cdata, utf8, delete_bytes +from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string, + xrange) +from mutagen import Metadata, FileType, StreamInfo +from mutagen._util import (DictMixin, cdata, delete_bytes, total_ordering, + MutagenError) def is_valid_apev2_key(key): - return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and - key not in ["OggS", "TAG", "ID3", "MP+"]) + if not isinstance(key, text_type): + if PY3: + raise TypeError("APEv2 key must be str") + + try: + key = key.decode('ascii') + except UnicodeDecodeError: + return False + + # PY26 - Change to set literal syntax (since set is faster than list here) + return ((2 <= len(key) <= 255) and (min(key) >= u' ') and + (max(key) <= u'~') and + (key not in [u"OggS", u"TAG", u"ID3", u"MP+"])) # There are three different kinds of APE tag values. # "0: Item contains text information coded in UTF-8 @@ -48,12 +63,12 @@ def is_valid_apev2_key(key): # 3: reserved" TEXT, BINARY, EXTERNAL = range(3) -HAS_HEADER = 1L << 31 -HAS_NO_FOOTER = 1L << 30 -IS_HEADER = 1L << 29 +HAS_HEADER = 1 << 31 +HAS_NO_FOOTER = 1 << 30 +IS_HEADER = 1 << 29 -class error(IOError): +class error(IOError, MutagenError): pass @@ -89,9 +104,17 @@ class _APEv2Data(object): def __init__(self, fileobj): self.__find_metadata(fileobj) - self.metadata = max(self.header, self.footer) + + if self.header is None: + self.metadata = self.footer + elif self.footer is None: + self.metadata = self.header + else: + self.metadata = max(self.header, self.footer) + if self.metadata is None: return + self.__fill_missing(fileobj) self.__fix_brokenness(fileobj) if self.data is not None: @@ -107,7 +130,7 @@ class _APEv2Data(object): except IOError: fileobj.seek(0, 2) return - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": fileobj.seek(-8, 1) self.footer = self.metadata = fileobj.tell() return @@ -115,10 +138,10 @@ class _APEv2Data(object): # Check for an APEv2 tag followed by an ID3v1 tag at the end. try: fileobj.seek(-128, 2) - if fileobj.read(3) == "TAG": + if fileobj.read(3) == b"TAG": fileobj.seek(-35, 1) # "TAG" + header length - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": fileobj.seek(-8, 1) self.footer = fileobj.tell() return @@ -127,7 +150,7 @@ class _APEv2Data(object): # (http://www.id3.org/lyrics3200.html) # (header length - "APETAGEX") - "LYRICS200" fileobj.seek(15, 1) - if fileobj.read(9) == 'LYRICS200': + if fileobj.read(9) == b'LYRICS200': fileobj.seek(-15, 1) # "LYRICS200" + size tag try: offset = int(fileobj.read(6)) @@ -135,7 +158,7 @@ class _APEv2Data(object): raise IOError fileobj.seek(-32 - offset - 6, 1) - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": fileobj.seek(-8, 1) self.footer = fileobj.tell() return @@ -145,7 +168,7 @@ class _APEv2Data(object): # Check for a tag at the start. fileobj.seek(0, 0) - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": self.is_at_start = True self.header = 0 @@ -162,7 +185,7 @@ class _APEv2Data(object): # offset + the size, which includes the footer. self.end = self.data + self.size fileobj.seek(self.end - 32, 0) - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": self.footer = self.end - 32 elif self.footer is not None: self.end = self.footer + 32 @@ -194,7 +217,7 @@ class _APEv2Data(object): except IOError: break else: - if fileobj.read(8) == "APETAGEX": + if fileobj.read(8) == b"APETAGEX": fileobj.seek(-8, 1) start = fileobj.tell() else: @@ -202,18 +225,12 @@ class _APEv2Data(object): self.start = start -class APEv2(DictMixin, Metadata): - """A file with an APEv2 tag. - - ID3v1 tags are silently ignored and overwritten. - """ - - filename = None +class _CIDictProxy(DictMixin): def __init__(self, *args, **kwargs): self.__casemap = {} self.__dict = {} - super(APEv2, self).__init__(*args, **kwargs) + super(_CIDictProxy, self).__init__(*args, **kwargs) # Internally all names are stored as lowercase, but the case # they were set with is remembered and used when saving. This # is roughly in line with the standard, which says that keys @@ -221,14 +238,40 @@ class APEv2(DictMixin, Metadata): # not allowed, and recommends case-insensitive # implementations. + def __getitem__(self, key): + return self.__dict[key.lower()] + + def __setitem__(self, key, value): + lower = key.lower() + self.__casemap[lower] = key + self.__dict[lower] = value + + def __delitem__(self, key): + lower = key.lower() + del(self.__casemap[lower]) + del(self.__dict[lower]) + + def keys(self): + return [self.__casemap.get(key, key) for key in self.__dict.keys()] + + +class APEv2(_CIDictProxy, Metadata): + """A file with an APEv2 tag. + + ID3v1 tags are silently ignored and overwritten. + """ + + filename = None + def pprint(self): """Return tag key=value pairs in a human-readable format.""" - items = self.items() - items.sort() - return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items]) + + items = sorted(self.items()) + return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) def load(self, filename): """Load tags from a filename.""" + self.filename = filename fileobj = open(filename, "rb") try: @@ -237,15 +280,14 @@ class APEv2(DictMixin, Metadata): fileobj.close() if data.tag: self.clear() - self.__casemap.clear() self.__parse_tag(data.tag, data.items) else: raise APENoHeaderError("No APE tag found") def __parse_tag(self, tag, count): - fileobj = StringIO(tag) + fileobj = cBytesIO(tag) - for i in range(count): + for i in xrange(count): size_data = fileobj.read(4) # someone writes wrong item counts if not size_data: @@ -259,25 +301,37 @@ class APEv2(DictMixin, Metadata): if kind == 3: raise APEBadItemError("value type must be 0, 1, or 2") key = value = fileobj.read(1) - while key[-1:] != '\x00' and value: + while key[-1:] != b'\x00' and value: value = fileobj.read(1) key += value - if key[-1:] == "\x00": + if key[-1:] == b"\x00": key = key[:-1] + if PY3: + try: + key = key.decode("ascii") + except UnicodeError as err: + reraise(APEBadItemError, err, sys.exc_info()[2]) value = fileobj.read(size) - self[key] = APEValue(value, kind) + + value = _get_value_type(kind)._new(value) + + self[key] = value def __getitem__(self, key): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - key = key.encode('ascii') - return self.__dict[key.lower()] + if PY2: + key = key.encode('ascii') + + return super(APEv2, self).__getitem__(key) def __delitem__(self, key): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - key = key.encode('ascii') - del(self.__dict[key.lower()]) + if PY2: + key = key.encode('ascii') + + super(APEv2, self).__delitem__(key) def __setitem__(self, key, value): """'Magic' value setter. @@ -288,6 +342,9 @@ class APEv2(DictMixin, Metadata): as a list of string/Unicode values. If you pass in a string that is not valid UTF-8, it assumes it is a binary value. + Python 3: all bytes will be assumed to be a byte value, even + if they are valid utf-8. + If you need to force a specific type of value (e.g. binary data that also happens to be valid UTF-8, or an external reference), use the APEValue factory and set the value to the @@ -299,30 +356,40 @@ class APEv2(DictMixin, Metadata): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - key = key.encode('ascii') + + if PY2: + key = key.encode('ascii') if not isinstance(value, _APEValue): # let's guess at the content if we're not already a value... - if isinstance(value, unicode): + if isinstance(value, text_type): # unicode? we've got to be text. - value = APEValue(utf8(value), TEXT) + value = APEValue(value, TEXT) elif isinstance(value, list): + items = [] + for v in value: + if not isinstance(v, text_type): + if PY3: + raise TypeError("item in list not str") + v = v.decode("utf-8") + items.append(v) + # list? text. - value = APEValue("\0".join(map(utf8, value)), TEXT) + value = APEValue(u"\0".join(items), TEXT) else: - try: - value.decode("utf-8") - except UnicodeError: - # invalid UTF8 text, probably binary + if PY3: value = APEValue(value, BINARY) else: - # valid UTF8, probably text - value = APEValue(value, TEXT) - self.__casemap[key.lower()] = key - self.__dict[key.lower()] = value + try: + value.decode("utf-8") + except UnicodeError: + # invalid UTF8 text, probably binary + value = APEValue(value, BINARY) + else: + # valid UTF8, probably text + value = APEValue(value, TEXT) - def keys(self): - return [self.__casemap.get(key, key) for key in self.__dict.keys()] + super(APEv2, self).__setitem__(key, value) def save(self, filename=None): """Save changes to a file. @@ -348,33 +415,49 @@ class APEv2(DictMixin, Metadata): fileobj.truncate() fileobj.seek(0, 2) + tags = [] + for key, value in self.items(): + # Packed format for an item: + # 4B: Value length + # 4B: Value type + # Key name + # 1B: Null + # Key value + value_data = value._write() + if not isinstance(key, bytes): + key = key.encode("utf-8") + tag_data = bytearray() + tag_data += struct.pack("<2I", len(value_data), value.kind << 1) + tag_data += key + b"\0" + value_data + tags.append(bytes(tag_data)) + # "APE tags items should be sorted ascending by size... This is # not a MUST, but STRONGLY recommended. Actually the items should # be sorted by importance/byte, but this is not feasible." - tags = [v._internal(k) for k, v in self.items()] - tags.sort(lambda a, b: cmp(len(a), len(b))) + tags.sort(key=len) num_tags = len(tags) - tags = "".join(tags) + tags = b"".join(tags) - header = "APETAGEX%s%s" % ( - # version, tag size, item count, flags - struct.pack("<4I", 2000, len(tags) + 32, num_tags, - HAS_HEADER | IS_HEADER), - "\0" * 8) + header = bytearray(b"APETAGEX") + # version, tag size, item count, flags + header += struct.pack("<4I", 2000, len(tags) + 32, num_tags, + HAS_HEADER | IS_HEADER) + header += b"\0" * 8 fileobj.write(header) fileobj.write(tags) - footer = "APETAGEX%s%s" % ( - # version, tag size, item count, flags - struct.pack("<4I", 2000, len(tags) + 32, num_tags, - HAS_HEADER), - "\0" * 8) + footer = bytearray(b"APETAGEX") + footer += struct.pack("<4I", 2000, len(tags) + 32, num_tags, + HAS_HEADER) + footer += b"\0" * 8 + fileobj.write(footer) fileobj.close() def delete(self, filename=None): """Remove tags from a file.""" + filename = filename or self.filename fileobj = open(filename, "r+b") try: @@ -391,105 +474,212 @@ Open = APEv2 def delete(filename): """Remove tags from a file.""" + try: APEv2(filename).delete() except APENoHeaderError: pass +def _get_value_type(kind): + """Returns a _APEValue subclass or raises ValueError""" + + if kind == TEXT: + return APETextValue + elif kind == BINARY: + return APEBinaryValue + elif kind == EXTERNAL: + return APEExtValue + raise ValueError("unknown kind %r" % kind) + + def APEValue(value, kind): """APEv2 tag value factory. Use this if you need to specify the value's type manually. Binary and text data are automatically detected by APEv2.__setitem__. """ - if kind == TEXT: - return APETextValue(value, kind) - elif kind == BINARY: - return APEBinaryValue(value, kind) - elif kind == EXTERNAL: - return APEExtValue(value, kind) - else: + + try: + type_ = _get_value_type(kind) + except ValueError: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL") + else: + return type_(value) class _APEValue(object): - def __init__(self, value, kind): - self.kind = kind - self.value = value - def __len__(self): - return len(self.value) + kind = None + value = None - def __str__(self): - return self.value + def __init__(self, value, kind=None): + # kind kwarg is for backwards compat + if kind is not None and kind != self.kind: + raise ValueError + self.value = self._validate(value) - # Packed format for an item: - # 4B: Value length - # 4B: Value type - # Key name - # 1B: Null - # Key value - def _internal(self, key): - return "%s%s\0%s" % ( - struct.pack("<2I", len(self.value), self.kind << 1), - key, self.value) + @classmethod + def _new(cls, data): + instance = cls.__new__(cls) + instance._parse(data) + return instance + + def _parse(self, data): + """Sets value or raises APEBadItemError""" + + raise NotImplementedError + + def _write(self): + """Returns bytes""" + + raise NotImplementedError + + def _validate(self, value): + """Returns validated value or raises TypeError/ValueErrr""" + + raise NotImplementedError def __repr__(self): return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind) -class APETextValue(_APEValue): +@swap_to_string +@total_ordering +class _APEUtf8Value(_APEValue): + + def _parse(self, data): + try: + self.value = data.decode("utf-8") + except UnicodeDecodeError as e: + reraise(APEBadItemError, e, sys.exc_info()[2]) + + def _validate(self, value): + if not isinstance(value, text_type): + if PY3: + raise TypeError("value not str") + else: + value = value.decode("utf-8") + return value + + def _write(self): + return self.value.encode("utf-8") + + def __len__(self): + return len(self.value) + + def __bytes__(self): + return self._write() + + def __eq__(self, other): + return self.value == other + + def __lt__(self, other): + return self.value < other + + def __str__(self): + return self.value + + +class APETextValue(_APEUtf8Value, MutableSequence): """An APEv2 text value. Text values are Unicode/UTF-8 strings. They can be accessed like - strings (with a null seperating the values), or arrays of strings.""" + strings (with a null separating the values), or arrays of strings. + """ - def __unicode__(self): - return unicode(str(self), "utf-8") + kind = TEXT def __iter__(self): """Iterate over the strings of the value (not the characters)""" - return iter(unicode(self).split("\0")) + + return iter(self.value.split(u"\0")) def __getitem__(self, index): - return unicode(self).split("\0")[index] + return self.value.split(u"\0")[index] def __len__(self): - return self.value.count("\0") + 1 - - def __cmp__(self, other): - return cmp(unicode(self), other) - - __hash__ = _APEValue.__hash__ + return self.value.count(u"\0") + 1 def __setitem__(self, index, value): + if not isinstance(value, text_type): + if PY3: + raise TypeError("value not str") + else: + value = value.decode("utf-8") + values = list(self) - values[index] = value.encode("utf-8") - self.value = "\0".join(values).encode("utf-8") + values[index] = value + self.value = u"\0".join(values) + + def insert(self, index, value): + if not isinstance(value, text_type): + if PY3: + raise TypeError("value not str") + else: + value = value.decode("utf-8") + + values = list(self) + values.insert(index, value) + self.value = u"\0".join(values) + + def __delitem__(self, index): + values = list(self) + del values[index] + self.value = u"\0".join(values) def pprint(self): - return " / ".join(self) + return u" / ".join(self) +@swap_to_string +@total_ordering class APEBinaryValue(_APEValue): """An APEv2 binary value.""" + kind = BINARY + + def _parse(self, data): + self.value = data + + def _write(self): + return self.value + + def _validate(self, value): + if not isinstance(value, bytes): + raise TypeError("value not bytes") + return bytes(value) + + def __len__(self): + return len(self.value) + + def __bytes__(self): + return self._write() + + def __eq__(self, other): + return self.value == other + + def __lt__(self, other): + return self.value < other + def pprint(self): - return "[%d bytes]" % len(self) + return u"[%d bytes]" % len(self) -class APEExtValue(_APEValue): +class APEExtValue(_APEUtf8Value): """An APEv2 external value. External values are usually URI or IRI strings. """ + + kind = EXTERNAL + def pprint(self): - return "[External] %s" % unicode(self) + return u"[External] %s" % self.value class APEv2File(FileType): - class _Info(object): + class _Info(StreamInfo): length = 0 bitrate = 0 @@ -498,14 +688,14 @@ class APEv2File(FileType): @staticmethod def pprint(): - return "Unknown format with APEv2 tag." + return u"Unknown format with APEv2 tag." def load(self, filename): self.filename = filename self.info = self._Info(open(filename, "rb")) try: self.tags = APEv2(filename) - except error: + except APENoHeaderError: self.tags = None def add_tags(self): @@ -521,5 +711,4 @@ class APEv2File(FileType): except IOError: fileobj.seek(0) footer = fileobj.read() - filename = filename.lower() - return (("APETAGEX" in footer) - header.startswith("ID3")) + return ((b"APETAGEX" in footer) - header.startswith(b"ID3")) diff --git a/lib/mutagen/asf.py b/lib/mutagen/asf.py index fab5559b..7cb7910d 100644 --- a/lib/mutagen/asf.py +++ b/lib/mutagen/asf.py @@ -1,5 +1,8 @@ -# Copyright 2006-2007 Lukas Lalinsky -# Copyright 2005-2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2005-2006 Joe Wreschnig +# Copyright (C) 2006-2007 Lukas Lalinsky + # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,12 +12,16 @@ __all__ = ["ASF", "Open"] +import sys import struct -from mutagen import FileType, Metadata -from mutagen._util import insert_bytes, delete_bytes, DictMixin +from mutagen import FileType, Metadata, StreamInfo +from mutagen._util import (insert_bytes, delete_bytes, DictMixin, + total_ordering, MutagenError) +from ._compat import swap_to_string, text_type, PY2, string_types, reraise, \ + xrange, long_, PY3 -class error(IOError): +class error(IOError, MutagenError): pass @@ -26,7 +33,7 @@ class ASFHeaderError(error): pass -class ASFInfo(object): +class ASFInfo(StreamInfo): """ASF stream information.""" def __init__(self): @@ -45,7 +52,7 @@ class ASFTags(list, DictMixin, Metadata): """Dictionary containing ASF attributes.""" def pprint(self): - return "\n".join(["%s=%s" % (k, v) for k, v in self]) + return "\n".join("%s=%s" % (k, v) for k, v in self) def __getitem__(self, key): """A list of values for the key. @@ -54,6 +61,11 @@ class ASFTags(list, DictMixin, Metadata): work. """ + + # PY3 only + if isinstance(key, slice): + return list.__getitem__(self, key) + values = [value for (k, value) in self if k == key] if not values: raise KeyError(key) @@ -62,11 +74,17 @@ class ASFTags(list, DictMixin, Metadata): def __delitem__(self, key): """Delete all values associated with the key.""" - to_delete = filter(lambda x: x[0] == key, self) + + # PY3 only + if isinstance(key, slice): + return list.__delitem__(self, key) + + to_delete = [x for x in self if x[0] == key] if not to_delete: raise KeyError(key) else: - map(self.remove, to_delete) + for k in to_delete: + self.remove(k) def __contains__(self, key): """Return true if the key has any values.""" @@ -84,29 +102,41 @@ class ASFTags(list, DictMixin, Metadata): string. """ + + # PY3 only + if isinstance(key, slice): + return list.__setitem__(self, key, values) + if not isinstance(values, list): values = [values] - try: - del(self[key]) - except KeyError: - pass + + to_append = [] for value in values: - if key in _standard_attribute_names: - value = unicode(value) - elif not isinstance(value, ASFBaseAttribute): - if isinstance(value, basestring): + if not isinstance(value, ASFBaseAttribute): + if isinstance(value, string_types): value = ASFUnicodeAttribute(value) + elif PY3 and isinstance(value, bytes): + value = ASFByteArrayAttribute(value) elif isinstance(value, bool): value = ASFBoolAttribute(value) elif isinstance(value, int): value = ASFDWordAttribute(value) - elif isinstance(value, long): + elif isinstance(value, long_): value = ASFQWordAttribute(value) - self.append((key, value)) + else: + raise TypeError("Invalid type %r" % type(value)) + to_append.append((key, value)) + + try: + del(self[key]) + except KeyError: + pass + + self.extend(to_append) def keys(self): """Return all keys in the comment.""" - return self and set(zip(*self)[0]) + return self and set(next(iter(zip(*self)))) def as_dict(self): """Return a copy of the comment data in a real dict.""" @@ -127,7 +157,19 @@ class ASFBaseAttribute(object): if data: self.value = self.parse(data, **kwargs) else: - self.value = value + if value is None: + # we used to support not passing any args and instead assign + # them later, keep that working.. + self.value = None + else: + self.value = self._validate(value) + + def _validate(self, value): + """Raises TypeError or ValueError in case the user supplied value + isn't valid. + """ + + return value def data_size(self): raise NotImplementedError @@ -142,13 +184,13 @@ class ASFBaseAttribute(object): return name def render(self, name): - name = name.encode("utf-16-le") + "\x00\x00" + name = name.encode("utf-16-le") + b"\x00\x00" data = self._render() return (struct.pack(" 0: - texts.append(data[pos:end].decode("utf-16-le").strip("\x00")) + texts.append(data[pos:end].decode("utf-16-le").strip(u"\x00")) else: texts.append(None) pos = end - title, author, copyright, desc, rating = texts - for key, value in dict( - Title=title, - Author=author, - Copyright=copyright, - Description=desc, - Rating=rating - ).items(): + + for key, value in zip(self.NAMES, texts): if value is not None: - asf.tags[key] = value + value = ASFUnicodeAttribute(value=value) + asf._tags.setdefault(self.GUID, []).append((key, value)) def render(self, asf): def render_text(name): - value = asf.tags.get(name, []) - if value: - return value[0].encode("utf-16-le") + "\x00\x00" + value = asf.to_content_description.get(name) + if value is not None: + return text_type(value).encode("utf-16-le") + b"\x00\x00" else: - return "" - texts = map(render_text, _standard_attribute_names) - data = struct.pack(" 0xFFFF or value.TYPE == GUID) - if (value.language is None and value.stream is None and - name not in self.to_extended_content_description and - not library_only): - self.to_extended_content_description[name] = value - elif (value.language is None and value.stream is not None and - name not in self.to_metadata and not library_only): - self.to_metadata[name] = value - else: + can_cont_desc = value.TYPE == UNICODE + + if library_only or value.language is not None: self.to_metadata_library.append((name, value)) + elif value.stream is not None: + if name not in self.to_metadata: + self.to_metadata[name] = value + else: + self.to_metadata_library.append((name, value)) + elif name in ContentDescriptionObject.NAMES: + if name not in self.to_content_description and can_cont_desc: + self.to_content_description[name] = value + else: + self.to_metadata_library.append((name, value)) + else: + if name not in self.to_extended_content_description: + self.to_extended_content_description[name] = value + else: + self.to_metadata_library.append((name, value)) # Add missing objects if not self.content_description_obj: @@ -651,13 +805,12 @@ class ASF(FileType): self.header_extension_obj.objects.append(self.metadata_library_obj) # Render the header - data = "".join([obj.render(self) for obj in self.objects]) + data = b"".join([obj.render(self) for obj in self.objects]) data = (HeaderObject.GUID + struct.pack(" self.size: insert_bytes(fileobj, size - self.size, self.size) @@ -665,8 +818,6 @@ class ASF(FileType): delete_bytes(fileobj, self.size - size, 0) fileobj.seek(0) fileobj.write(data) - finally: - fileobj.close() self.size = size self.num_objects = len(self.objects) @@ -684,9 +835,16 @@ class ASF(FileType): self.size, self.num_objects = struct.unpack(" u'\x7f': enc = 3 + break + id3.add(mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) else: frame.text = value @@ -171,8 +175,10 @@ class EasyID3(DictMixin, Metadata): load = property(lambda s: s.__id3.load, lambda s, v: setattr(s.__id3, 'load', v)) - save = property(lambda s: s.__id3.save, - lambda s, v: setattr(s.__id3, 'save', v)) + def save(self, *args, **kwargs): + # ignore v2_version until we support 2.3 here + kwargs.pop("v2_version", None) + self.__id3.save(*args, **kwargs) delete = property(lambda s: s.__id3.delete, lambda s, v: setattr(s.__id3, 'delete', v)) @@ -193,8 +199,12 @@ class EasyID3(DictMixin, Metadata): def __setitem__(self, key, value): key = key.lower() - if isinstance(value, basestring): - value = [value] + if PY2: + if isinstance(value, basestring): + value = [value] + else: + if isinstance(value, text_type): + value = [value] func = dict_match(self.Set, key, self.SetFallback) if func is not None: return func(self.__id3, key, value) @@ -263,6 +273,18 @@ def date_delete(id3, key): del(id3["TDRC"]) +def original_date_get(id3, key): + return [stamp.text for stamp in id3["TDOR"].text] + + +def original_date_set(id3, key, value): + id3.add(mutagen.id3.TDOR(encoding=3, text=value)) + + +def original_date_delete(id3, key): + del(id3["TDOR"]) + + def performer_get(id3, key): people = [] wanted_role = key.split(":", 1)[1] @@ -433,7 +455,7 @@ def peakgain_list(id3, key): keys.append("replaygain_%s_peak" % frame.desc) return keys -for frameid, key in { +for frameid, key in iteritems({ "TALB": "album", "TBPM": "bpm", "TCMP": "compilation", # iTunes extension @@ -461,18 +483,20 @@ for frameid, key in { "TSOT": "titlesort", "TSRC": "isrc", "TSST": "discsubtitle", -}.iteritems(): + "TLAN": "language", +}): EasyID3.RegisterTextKey(key, frameid) EasyID3.RegisterKey("genre", genre_get, genre_set, genre_delete) EasyID3.RegisterKey("date", date_get, date_set, date_delete) +EasyID3.RegisterKey("originaldate", original_date_get, original_date_set, + original_date_delete) EasyID3.RegisterKey( "performer:*", performer_get, performer_set, performer_delete, performer_list) EasyID3.RegisterKey("musicbrainz_trackid", musicbrainz_trackid_get, musicbrainz_trackid_set, musicbrainz_trackid_delete) EasyID3.RegisterKey("website", website_get, website_set, website_delete) -EasyID3.RegisterKey("website", website_get, website_set, website_delete) EasyID3.RegisterKey( "replaygain_*_gain", gain_get, gain_set, gain_delete, peakgain_list) EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete) @@ -481,7 +505,7 @@ EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete) # http://musicbrainz.org/docs/specs/metadata_tags.html # http://bugs.musicbrainz.org/ticket/1383 # http://musicbrainz.org/doc/MusicBrainzTag -for desc, key in { +for desc, key in iteritems({ u"MusicBrainz Artist Id": "musicbrainz_artistid", u"MusicBrainz Album Id": "musicbrainz_albumid", u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid", @@ -495,7 +519,13 @@ for desc, key in { u"ASIN": "asin", u"ALBUMARTISTSORT": "albumartistsort", u"BARCODE": "barcode", -}.iteritems(): + u"CATALOGNUMBER": "catalognumber", + u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", + u"MusicBrainz Release Group Id": "musicbrainz_releasegroupid", + u"MusicBrainz Work Id": "musicbrainz_workid", + u"Acoustid Fingerprint": "acoustid_fingerprint", + u"Acoustid Id": "acoustid_id", +}): EasyID3.RegisterTXXXKey(key, desc) diff --git a/lib/mutagen/easymp4.py b/lib/mutagen/easymp4.py index 65e78b74..627c0e28 100644 --- a/lib/mutagen/easymp4.py +++ b/lib/mutagen/easymp4.py @@ -1,12 +1,16 @@ -# Copyright 2009 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2009 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. from mutagen import Metadata -from mutagen._util import DictMixin, dict_match, utf8 +from mutagen._util import DictMixin, dict_match from mutagen.mp4 import MP4, MP4Tags, error, delete +from ._compat import PY2, text_type, PY3 + __all__ = ["EasyMP4Tags", "EasyMP4", "delete", "error"] @@ -91,16 +95,16 @@ class EasyMP4Tags(DictMixin, Metadata): cls.RegisterKey(key, getter, setter, deleter) @classmethod - def RegisterIntKey(cls, key, atomid, min_value=0, max_value=2**16-1): + def RegisterIntKey(cls, key, atomid, min_value=0, max_value=(2 ** 16) - 1): """Register a scalar integer key. """ def getter(tags, key): - return map(unicode, tags[atomid]) + return list(map(text_type, tags[atomid])) def setter(tags, key, value): clamp = lambda x: int(min(max(min_value, x), max_value)) - tags[atomid] = map(clamp, map(int, value)) + tags[atomid] = [clamp(v) for v in map(int, value)] def deleter(tags, key): del(tags[atomid]) @@ -108,14 +112,15 @@ class EasyMP4Tags(DictMixin, Metadata): cls.RegisterKey(key, getter, setter, deleter) @classmethod - def RegisterIntPairKey(cls, key, atomid, min_value=0, max_value=2**16-1): + def RegisterIntPairKey(cls, key, atomid, min_value=0, + max_value=(2 ** 16) - 1): def getter(tags, key): ret = [] for (track, total) in tags[atomid]: if total: ret.append(u"%d/%d" % (track, total)) else: - ret.append(unicode(track)) + ret.append(text_type(track)) return ret def setter(tags, key, value): @@ -148,13 +153,20 @@ class EasyMP4Tags(DictMixin, Metadata): EasyMP4Tags.RegisterFreeformKey( "musicbrainz_artistid", "MusicBrainz Artist Id") """ - atomid = "----:%s:%s" % (mean, name) + atomid = "----:" + mean + ":" + name def getter(tags, key): return [s.decode("utf-8", "replace") for s in tags[atomid]] def setter(tags, key, value): - tags[atomid] = map(utf8, value) + encoded = [] + for v in value: + if not isinstance(v, text_type): + if PY3: + raise TypeError("%r not str" % v) + v = v.decode("utf-8") + encoded.append(v.encode("utf-8")) + tags[atomid] = encoded def deleter(tags, key): del(tags[atomid]) @@ -171,8 +183,14 @@ class EasyMP4Tags(DictMixin, Metadata): def __setitem__(self, key, value): key = key.lower() - if isinstance(value, basestring): - value = [value] + + if PY2: + if isinstance(value, basestring): + value = [value] + else: + if isinstance(value, text_type): + value = [value] + func = dict_match(self.Set, key) if func is not None: return func(self.__mp4, key, value) diff --git a/lib/mutagen/flac.py b/lib/mutagen/flac.py index f8e014bc..50a8f9d5 100644 --- a/lib/mutagen/flac.py +++ b/lib/mutagen/flac.py @@ -1,5 +1,6 @@ -# FLAC comment support for Mutagen -# Copyright 2005 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as @@ -22,17 +23,16 @@ http://flac.sourceforge.net/format.html __all__ = ["FLAC", "Open", "delete"] import struct -from cStringIO import StringIO -from _vorbis import VCommentDict -from mutagen import FileType -from mutagen._util import insert_bytes +from ._vorbis import VCommentDict +import mutagen + +from ._compat import cBytesIO, endswith, chr_ +from mutagen._util import insert_bytes, MutagenError from mutagen.id3 import BitPaddedInt -import sys -if sys.version_info >= (2, 6): - from functools import reduce +from functools import reduce -class error(IOError): +class error(IOError, MutagenError): pass @@ -44,10 +44,10 @@ class FLACVorbisError(ValueError, error): pass -def to_int_be(string): +def to_int_be(data): """Convert an arbitrarily-long string to a long using big-endian byte order.""" - return reduce(lambda a, b: (a << 8) + ord(b), string, 0L) + return reduce(lambda a, b: (a << 8) + b, bytearray(data), 0) class StrictFileObject(object): @@ -89,8 +89,8 @@ class MetadataBlock(object): The metadata header should not be included.""" if data is not None: if not isinstance(data, StrictFileObject): - if isinstance(data, str): - data = StringIO(data) + if isinstance(data, bytes): + data = cBytesIO(data) elif not hasattr(data, 'read'): raise TypeError( "StreamInfo requires string data or a file-like") @@ -110,30 +110,33 @@ class MetadataBlock(object): codes = [[block.code, block.write()] for block in blocks] codes[-1][0] |= 128 for code, datum in codes: - byte = chr(code) - if len(datum) > 2**24: + byte = chr_(code) + if len(datum) > 2 ** 24: raise error("block is too long to write") length = struct.pack(">I", len(datum))[-3:] data.append(byte + length + datum) - return "".join(data) + return b"".join(data) @staticmethod def group_padding(blocks): """Consolidate FLAC padding metadata blocks. The overall size of the rendered blocks does not change, so - this adds several bytes of padding for each merged block.""" - paddings = filter(lambda x: isinstance(x, Padding), blocks) - map(blocks.remove, paddings) + this adds several bytes of padding for each merged block. + """ + + paddings = [b for b in blocks if isinstance(b, Padding)] + for p in paddings: + blocks.remove(p) # total padding size is the sum of padding sizes plus 4 bytes # per removed header. - size = sum([padding.length for padding in paddings]) + size = sum(padding.length for padding in paddings) padding = Padding() padding.length = size + 4 * (len(paddings) - 1) blocks.append(padding) -class StreamInfo(MetadataBlock): +class StreamInfo(MetadataBlock, mutagen.StreamInfo): """FLAC stream information. This contains information about the audio data in the FLAC file. @@ -188,13 +191,13 @@ class StreamInfo(MetadataBlock): bps_tail = bps_total >> 36 bps_head = (sample_channels_bps & 1) << 4 self.bits_per_sample = int(bps_head + bps_tail + 1) - self.total_samples = bps_total & 0xFFFFFFFFFL + self.total_samples = bps_total & 0xFFFFFFFFF self.length = self.total_samples / float(self.sample_rate) self.md5_signature = to_int_be(data.read(16)) def write(self): - f = StringIO() + f = cBytesIO() f.write(struct.pack(">I", self.min_blocksize)[-2:]) f.write(struct.pack(">I", self.max_blocksize)[-2:]) f.write(struct.pack(">I", self.min_framesize)[-3:]) @@ -206,18 +209,18 @@ class StreamInfo(MetadataBlock): byte = (self.sample_rate & 0xF) << 4 byte += ((self.channels - 1) & 7) << 1 byte += ((self.bits_per_sample - 1) >> 4) & 1 - f.write(chr(byte)) + f.write(chr_(byte)) # 4 bits of bps, 4 of sample count byte = ((self.bits_per_sample - 1) & 0xF) << 4 byte += (self.total_samples >> 32) & 0xF - f.write(chr(byte)) + f.write(chr_(byte)) # last 32 of sample count - f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL)) + f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFF)) # MD5 signature sig = self.md5_signature f.write(struct.pack( - ">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL, - (sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL)) + ">4I", (sig >> 96) & 0xFFFFFFFF, (sig >> 64) & 0xFFFFFFFF, + (sig >> 32) & 0xFFFFFFFF, sig & 0xFFFFFFFF)) return f.getvalue() def pprint(self): @@ -284,7 +287,7 @@ class SeekTable(MetadataBlock): sp = data.tryread(self.__SEEKPOINT_SIZE) def write(self): - f = StringIO() + f = cBytesIO() for seekpoint in self.seekpoints: packed = struct.pack( self.__SEEKPOINT_FORMAT, @@ -378,10 +381,10 @@ class CueSheetTrack(object): __hash__ = object.__hash__ def __repr__(self): - return ("<%s number=%r, offset=%d, isrc=%r, type=%r, " - "pre_emphasis=%r, indexes=%r)>") % ( - type(self).__name__, self.track_number, self.start_offset, - self.isrc, self.type, self.pre_emphasis, self.indexes) + return (("<%s number=%r, offset=%d, isrc=%r, type=%r, " + "pre_emphasis=%r, indexes=%r)>") % + (type(self).__name__, self.track_number, self.start_offset, + self.isrc, self.type, self.pre_emphasis, self.indexes)) class CueSheet(MetadataBlock): @@ -409,7 +412,7 @@ class CueSheet(MetadataBlock): code = 5 - media_catalog_number = '' + media_catalog_number = b'' lead_in_samples = 88200 compact_disc = True @@ -432,7 +435,7 @@ class CueSheet(MetadataBlock): header = data.read(self.__CUESHEET_SIZE) media_catalog_number, lead_in_samples, flags, num_tracks = \ struct.unpack(self.__CUESHEET_FORMAT, header) - self.media_catalog_number = media_catalog_number.rstrip('\0') + self.media_catalog_number = media_catalog_number.rstrip(b'\0') self.lead_in_samples = lead_in_samples self.compact_disc = bool(flags & 0x80) self.tracks = [] @@ -440,7 +443,7 @@ class CueSheet(MetadataBlock): track = data.read(self.__CUESHEET_TRACK_SIZE) start_offset, track_number, isrc_padded, flags, num_indexes = \ struct.unpack(self.__CUESHEET_TRACK_FORMAT, track) - isrc = isrc_padded.rstrip('\0') + isrc = isrc_padded.rstrip(b'\0') type_ = (flags & 0x80) >> 7 pre_emphasis = bool(flags & 0x40) val = CueSheetTrack( @@ -454,7 +457,7 @@ class CueSheet(MetadataBlock): self.tracks.append(val) def write(self): - f = StringIO() + f = cBytesIO() flags = 0 if self.compact_disc: flags |= 0x80 @@ -480,10 +483,10 @@ class CueSheet(MetadataBlock): return f.getvalue() def __repr__(self): - return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, " - "tracks=%r>") % ( - type(self).__name__, self.media_catalog_number, - self.lead_in_samples, self.compact_disc, self.tracks) + return (("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, " + "tracks=%r>") % + (type(self).__name__, self.media_catalog_number, + self.lead_in_samples, self.compact_disc, self.tracks)) class Picture(MetadataBlock): @@ -513,7 +516,7 @@ class Picture(MetadataBlock): self.height = 0 self.depth = 0 self.colors = 0 - self.data = '' + self.data = b'' super(Picture, self).__init__(data) def __eq__(self, other): @@ -541,7 +544,7 @@ class Picture(MetadataBlock): self.data = data.read(length) def write(self): - f = StringIO() + f = cBytesIO() mime = self.mime.encode('UTF-8') f.write(struct.pack('>2I', self.type, len(mime))) f.write(mime) @@ -569,7 +572,7 @@ class Padding(MetadataBlock): code = 1 - def __init__(self, data=""): + def __init__(self, data=b""): super(Padding, self).__init__(data) def load(self, data): @@ -577,7 +580,7 @@ class Padding(MetadataBlock): def write(self): try: - return "\x00" * self.length + return b"\x00" * self.length # On some 64 bit platforms this won't generate a MemoryError # or OverflowError since you might have enough RAM, but it # still generates a ValueError. On other 64 bit platforms, @@ -596,7 +599,7 @@ class Padding(MetadataBlock): return "<%s (%d bytes)>" % (type(self).__name__, self.length) -class FLAC(FileType): +class FLAC(mutagen.FileType): """A FLAC audio file. Attributes: @@ -615,9 +618,9 @@ class FLAC(FileType): """Known metadata block types, indexed by ID.""" @staticmethod - def score(filename, fileobj, header): - return (header.startswith("fLaC") + - filename.lower().endswith(".flac") * 3) + def score(filename, fileobj, header_data): + return (header_data.startswith(b"fLaC") + + endswith(filename.lower(), ".flac") * 3) def __read_metadata_block(self, fileobj): byte = ord(fileobj.read(1)) @@ -721,13 +724,15 @@ class FLAC(FileType): def clear_pictures(self): """Delete all pictures from the file.""" - self.metadata_blocks = filter(lambda b: b.code != Picture.code, - self.metadata_blocks) + + blocks = [b for b in self.metadata_blocks if b.code != Picture.code] + self.metadata_blocks = blocks @property def pictures(self): """List of embedded pictures""" - return filter(lambda b: b.code == Picture.code, self.metadata_blocks) + + return [b for b in self.metadata_blocks if b.code == Picture.code] def save(self, filename=None, deleteid3=False): """Save metadata blocks to a file. @@ -742,7 +747,7 @@ class FLAC(FileType): try: # Ensure we've got padding at the end, and only at the end. # If adding makes it too large, we'll scale it down later. - self.metadata_blocks.append(Padding('\x00' * 1020)) + self.metadata_blocks.append(Padding(b'\x00' * 1020)) MetadataBlock.group_padding(self.metadata_blocks) header = self.__check_header(f) @@ -776,7 +781,7 @@ class FLAC(FileType): insert_bytes(f, diff, header) f.seek(header - 4) - f.write("fLaC" + data) + f.write(b"fLaC" + data) # Delete ID3v1 if deleteid3: @@ -785,7 +790,7 @@ class FLAC(FileType): except IOError: pass else: - if f.read(3) == "TAG": + if f.read(3) == b"TAG": f.seek(-128, 2) f.truncate() finally: @@ -812,12 +817,12 @@ class FLAC(FileType): def __check_header(self, fileobj): size = 4 header = fileobj.read(4) - if header != "fLaC": + if header != b"fLaC": size = None - if header[:3] == "ID3": + if header[:3] == b"ID3": size = 14 + BitPaddedInt(fileobj.read(6)[2:]) fileobj.seek(size - 4) - if fileobj.read(4) != "fLaC": + if fileobj.read(4) != b"fLaC": size = None if size is None: raise FLACNoHeaderError( diff --git a/lib/mutagen/id3.py b/lib/mutagen/id3/__init__.py similarity index 67% rename from lib/mutagen/id3.py rename to lib/mutagen/id3/__init__.py index 27d30e90..bfb4a3f6 100644 --- a/lib/mutagen/id3.py +++ b/lib/mutagen/id3/__init__.py @@ -1,4 +1,5 @@ -# id3 support for mutagen +# -*- coding: utf-8 -*- + # Copyright (C) 2005 Michael Urman # 2006 Lukas Lalinsky # 2013 Christoph Reiter @@ -32,15 +33,30 @@ interested in the :class:`ID3` class to start with. __all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] import struct +import errno from struct import unpack, pack, error as StructError import mutagen -from mutagen._util import insert_bytes, delete_bytes, DictProxy +from mutagen._util import insert_bytes, delete_bytes, DictProxy, enum +from .._compat import chr_, PY3 -from mutagen._id3util import * -from mutagen._id3frames import * -from mutagen._id3specs import * +from ._util import * +from ._frames import * +from ._specs import * + + +@enum +class ID3v1SaveOptions(object): + + REMOVE = 0 + """ID3v1 tags will be removed""" + + UPDATE = 1 + """ID3v1 tags will be updated but not added""" + + CREATE = 2 + """ID3v1 tags will be created and/or updated""" class ID3(DictProxy, mutagen.Metadata): @@ -53,8 +69,11 @@ class ID3(DictProxy, mutagen.Metadata): * size -- the total size of the ID3 tag, including the header """ + __module__ = "mutagen.id3" + PEDANTIC = True version = (2, 4, 0) + """ID3 tag version as a tuple (of the loaded file)""" filename = None size = 0 @@ -73,15 +92,17 @@ class ID3(DictProxy, mutagen.Metadata): super(ID3, self).__init__(*args, **kwargs) def __fullread(self, size): + """ Read a certain number of bytes from the source file. """ + try: if size < 0: raise ValueError('Requested bytes (%s) less than zero' % size) if size > self.__filesize: raise EOFError('Requested %#x of %#x (%s)' % ( - long(size), long(self.__filesize), self.filename)) + int(size), int(self.__filesize), self.filename)) except AttributeError: pass - data = self.__fileobj.read(size) + data = self._fileobj.read(size) if len(data) != size: raise EOFError self.__readbytes += size @@ -107,37 +128,31 @@ class ID3(DictProxy, mutagen.Metadata): mutagen.id3.ID3(filename, known_frames=my_frames) """ - if not v2_version in (3, 4): + if v2_version not in (3, 4): raise ValueError("Only 3 and 4 possible for v2_version") from os.path import getsize self.filename = filename self.__known_frames = known_frames - self.__fileobj = open(filename, 'rb') + self._fileobj = open(filename, 'rb') self.__filesize = getsize(filename) try: try: - self.__load_header() + self._load_header() except EOFError: self.size = 0 raise ID3NoHeaderError("%s: too small (%d bytes)" % ( filename, self.__filesize)) - except (ID3NoHeaderError, ID3UnsupportedVersionError), err: + except (ID3NoHeaderError, ID3UnsupportedVersionError): self.size = 0 - import sys - stack = sys.exc_info()[2] - try: - self.__fileobj.seek(-128, 2) - except EnvironmentError: - raise err, None, stack - else: - frames = ParseID3v1(self.__fileobj.read(128)) - if frames is not None: - self.version = self._V11 - map(self.add, frames.values()) - else: - raise err, None, stack + frames, offset = _find_id3v1(self._fileobj) + if frames is None: + raise + + self.version = self._V11 + for v in frames.values(): + self.add(v) else: frames = self.__known_frames if frames is None: @@ -151,10 +166,10 @@ class ID3(DictProxy, mutagen.Metadata): self.add(frame) else: self.unknown_frames.append(frame) - self.__unknown_version = self.version + self.__unknown_version = self.version[:2] finally: - self.__fileobj.close() - del self.__fileobj + self._fileobj.close() + del self._fileobj del self.__filesize if translate: if v2_version == 3: @@ -188,8 +203,9 @@ class ID3(DictProxy, mutagen.Metadata): del(self[key]) else: key = key + ":" - for k in filter(lambda s: s.startswith(key), self.keys()): - del(self[k]) + for k in list(self.keys()): + if k.startswith(key): + del(self[k]) def setall(self, key, values): """Delete frames of the given type and add frames in 'values'.""" @@ -209,8 +225,7 @@ class ID3(DictProxy, mutagen.Metadata): ``POPM=user@example.org=3 128/255`` """ - frames = list(map(Frame.pprint, self.values())) - frames.sort() + frames = sorted(Frame.pprint(s) for s in self.values()) return "\n".join(frames) def loaded_frame(self, tag): @@ -227,7 +242,7 @@ class ID3(DictProxy, mutagen.Metadata): """Add a frame to the tag.""" return self.loaded_frame(frame) - def __load_header(self): + def _load_header(self): fn = self.filename data = self.__fullread(10) id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data) @@ -235,24 +250,25 @@ class ID3(DictProxy, mutagen.Metadata): self.size = BitPaddedInt(size) + 10 self.version = (2, vmaj, vrev) - if id3 != 'ID3': - raise ID3NoHeaderError("'%s' doesn't start with an ID3 tag" % fn) + if id3 != b'ID3': + raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) if vmaj not in [2, 3, 4]: - raise ID3UnsupportedVersionError("'%s' ID3v2.%d not supported" + raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" % (fn, vmaj)) if self.PEDANTIC: if not BitPaddedInt.has_valid_padding(size): raise ValueError("Header size not synchsafe") - if self._V24 <= self.version and (flags & 0x0f): - raise ValueError("'%s' has invalid flags %#02x" % (fn, flags)) - elif self._V23 <= self.version < self._V24 and (flags & 0x1f): - raise ValueError("'%s' has invalid flags %#02x" % (fn, flags)) + if (self._V24 <= self.version) and (flags & 0x0f): + raise ValueError("%r has invalid flags %#02x" % (fn, flags)) + elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): + raise ValueError("%r has invalid flags %#02x" % (fn, flags)) if self.f_extended: extsize = self.__fullread(4) - if extsize in Frames: + frame_id = extsize.decode("ascii", "replace") if PY3 else extsize + if frame_id in Frames: # Some tagger sets the extended header flag but # doesn't write an extended header; in this case, the # ID3 data follows immediately. Since no extended @@ -263,7 +279,7 @@ class ID3(DictProxy, mutagen.Metadata): # http://code.google.com/p/quodlibet/issues/detail?id=126 self.__flags ^= 0x40 self.__extsize = 0 - self.__fileobj.seek(-4, 1) + self._fileobj.seek(-4, 1) self.__readbytes -= 4 elif self.version >= self._V24: # "Where the 'Extended header size' is the size of the whole @@ -279,49 +295,13 @@ class ID3(DictProxy, mutagen.Metadata): if self.__extsize: self.__extdata = self.__fullread(self.__extsize) else: - self.__extdata = "" + self.__extdata = b"" - def __determine_bpi(self, data, frames, EMPTY="\x00" * 10): + def __determine_bpi(self, data, frames): if self.version < self._V24: return int - # have to special case whether to use bitpaddedints here - # spec says to use them, but iTunes has it wrong - # count number of tags found as BitPaddedInt and how far past - o = 0 - asbpi = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - bpioff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - size = BitPaddedInt(size) - o += 10 + size - if name in frames: - asbpi += 1 - else: - bpioff = o - len(data) - - # count number of tags found as int and how far past - o = 0 - asint = 0 - while o < len(data) - 10: - part = data[o:o + 10] - if part == EMPTY: - intoff = -((len(data) - o) % 10) - break - name, size, flags = unpack('>4sLH', part) - o += 10 + size - if name in frames: - asint += 1 - else: - intoff = o - len(data) - - # if more tags as int, or equal and bpi is past and int is not - if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): - return int - return BitPaddedInt + return _determine_bpi(data, frames) def __read_frames(self, data, frames): if self.version < self._V24 and self.f_unsynch: @@ -338,14 +318,27 @@ class ID3(DictProxy, mutagen.Metadata): name, size, flags = unpack('>4sLH', header) except struct.error: return # not enough header - if name.strip('\x00') == '': + if name.strip(b'\x00') == b'': return + size = bpi(size) - framedata = data[10:10+size] - data = data[10+size:] + framedata = data[10:10 + size] + data = data[10 + size:] if size == 0: continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + try: + # someone writes 2.3 frames with 2.2 names + if name[-1] == "\x00": + tag = Frames_2_2[name[:-1]] + name = tag.__base__.__name__ + tag = frames[name] except KeyError: if is_valid_frame_id(name): @@ -365,13 +358,21 @@ class ID3(DictProxy, mutagen.Metadata): name, size = unpack('>3s3s', header) except struct.error: return # not enough header - size, = struct.unpack('>L', '\x00'+size) - if name.strip('\x00') == '': + size, = struct.unpack('>L', b'\x00' + size) + if name.strip(b'\x00') == b'': return - framedata = data[6:6+size] - data = data[6+size:] + + framedata = data[6:6 + size] + data = data[6 + size:] if size == 0: continue # drop empty frames + + if PY3: + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue + try: tag = frames[name] except KeyError: @@ -393,29 +394,9 @@ class ID3(DictProxy, mutagen.Metadata): f_experimental = property(lambda s: bool(s.__flags & 0x20)) f_footer = property(lambda s: bool(s.__flags & 0x10)) - #f_crc = property(lambda s: bool(s.__extflags & 0x8000)) - - def save(self, filename=None, v1=1, v2_version=4, v23_sep='/'): - """Save changes to a file. - - If no filename is given, the one most recently loaded is used. - - Keyword arguments: - v1 -- if 0, ID3v1 tags will be removed - if 1, ID3v1 tags will be updated but not added - if 2, ID3v1 tags will be created and/or updated - v2 -- version of ID3v2 tags (3 or 4). - - By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 - tags, you must call method update_to_v23 before saving the file. - - v23_sep -- the separator used to join multiple text values - if v2_version == 3. Defaults to '/' but if it's None - will be the ID3v2v2.4 null separator. - - The lack of a way to update only an ID3v1 tag is intentional. - """ + # f_crc = property(lambda s: bool(s.__extflags & 0x8000)) + def _prepare_framedata(self, v2_version, v23_sep): if v2_version == 3: version = self._V23 elif v2_version == 4: @@ -425,38 +406,83 @@ class ID3(DictProxy, mutagen.Metadata): # Sort frames by 'importance' order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] - order = dict(zip(order, range(len(order)))) + order = dict((b, a) for a, b in enumerate(order)) last = len(order) - frames = self.items() - frames.sort(lambda a, b: cmp(order.get(a[0][:4], last), - order.get(b[0][:4], last))) + frames = sorted(self.items(), + key=lambda a: (order.get(a[0][:4], last), a[0])) framedata = [self.__save_frame(frame, version=version, v23_sep=v23_sep) for (key, frame) in frames] # only write unknown frames if they were loaded from the version # we are saving with or upgraded to it - if self.__unknown_version == version: - framedata.extend([data for data in self.unknown_frames - if len(data) > 10]) + if self.__unknown_version == version[:2]: + framedata.extend(data for data in self.unknown_frames + if len(data) > 10) + + return b''.join(framedata) + + def _prepare_id3_header(self, original_header, framesize, v2_version): + try: + id3, vmaj, vrev, flags, insize = \ + unpack('>3sBBB4s', original_header) + except struct.error: + id3, insize = b'', 0 + insize = BitPaddedInt(insize) + if id3 != b'ID3': + insize = -10 + + if insize >= framesize: + outsize = insize + else: + outsize = (framesize + 1023) & ~0x3FF + + framesize = BitPaddedInt.to_str(outsize, width=4) + header = pack('>3sBBB4s', b'ID3', v2_version, 0, 0, framesize) + + return (header, outsize, insize) + + def save(self, filename=None, v1=1, v2_version=4, v23_sep='/'): + """Save changes to a file. + + Args: + filename: + Filename to save the tag to. If no filename is given, + the one most recently loaded is used. + v1 (ID3v1SaveOptions): + if 0, ID3v1 tags will be removed. + if 1, ID3v1 tags will be updated but not added. + if 2, ID3v1 tags will be created and/or updated + v2 (int): + version of ID3v2 tags (3 or 4). + v23_sep (str): + the separator used to join multiple text values + if v2_version == 3. Defaults to '/' but if it's None + will be the ID3v2v2.4 null separator. + + By default Mutagen saves ID3v2.4 tags. If you want to save ID3v2.3 + tags, you must call method update_to_v23 before saving the file. + + The lack of a way to update only an ID3v1 tag is intentional. + """ + + framedata = self._prepare_framedata(v2_version, v23_sep) + framesize = len(framedata) if not framedata: try: self.delete(filename) - except EnvironmentError, err: + except EnvironmentError as err: from errno import ENOENT if err.errno != ENOENT: raise return - framedata = ''.join(framedata) - framesize = len(framedata) - if filename is None: filename = self.filename try: f = open(filename, 'rb+') - except IOError, err: + except IOError as err: from errno import ENOENT if err.errno != ENOENT: raise @@ -464,61 +490,33 @@ class ID3(DictProxy, mutagen.Metadata): f = open(filename, 'rb+') try: idata = f.read(10) - try: - id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) - except struct.error: - id3, insize = '', 0 - insize = BitPaddedInt(insize) - if id3 != 'ID3': - insize = -10 - if insize >= framesize: - outsize = insize - else: - outsize = (framesize + 1023) & ~0x3FF - framedata += '\x00' * (outsize - framesize) + header = self._prepare_id3_header(idata, framesize, v2_version) + header, outsize, insize = header - framesize = BitPaddedInt.to_str(outsize, width=4) - flags = 0 - header = pack('>3sBBB4s', 'ID3', v2_version, 0, flags, framesize) - data = header + framedata + data = header + framedata + (b'\x00' * (outsize - framesize)) if (insize < outsize): - insert_bytes(f, outsize-insize, insize+10) + insert_bytes(f, outsize - insize, insize + 10) f.seek(0) f.write(data) - try: - f.seek(-128, 2) - except IOError, err: - # If the file is too small, that's OK - it just means - # we're certain it doesn't have a v1 tag. - from errno import EINVAL - if err.errno != EINVAL: - # If we failed to see for some other reason, bail out. - raise - # Since we're sure this isn't a v1 tag, don't read it. - f.seek(0, 2) - - data = f.read(128) - try: - idx = data.index("TAG") - except ValueError: - offset = 0 - has_v1 = False - else: - offset = idx - len(data) - has_v1 = True - - f.seek(offset, 2) - if v1 == 1 and has_v1 or v1 == 2: - f.write(MakeID3v1(self)) - else: - f.truncate() + self.__save_v1(f, v1) finally: f.close() + def __save_v1(self, f, v1): + tag, offset = _find_id3v1(f) + has_v1 = tag is not None + + f.seek(offset, 2) + if v1 == ID3v1SaveOptions.UPDATE and has_v1 or \ + v1 == ID3v1SaveOptions.CREATE: + f.write(MakeID3v1(self)) + else: + f.truncate() + def delete(self, filename=None, delete_v1=True, delete_v2=True): """Remove tags from a file. @@ -538,7 +536,7 @@ class ID3(DictProxy, mutagen.Metadata): flags = 0 if self.PEDANTIC and isinstance(frame, TextFrame): if len(str(frame)) == 0: - return '' + return b'' if version == self._V23: framev23 = frame._get_v23_frame(sep=v23_sep) @@ -551,8 +549,8 @@ class ID3(DictProxy, mutagen.Metadata): # Disabled as this causes iTunes and other programs # to fail to find these frames, which usually includes # e.g. APIC. - #framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') - #flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN + # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') + # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN pass if version == self._V24: @@ -563,7 +561,16 @@ class ID3(DictProxy, mutagen.Metadata): raise ValueError datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) - header = pack('>4s4sH', name or type(frame).__name__, datasize, flags) + + if name is not None: + assert isinstance(name, bytes) + frame_name = name + else: + frame_name = type(frame).__name__ + if PY3: + frame_name = frame_name.encode("ascii") + + header = pack('>4s4sH', frame_name, datasize, flags) return header + framedata def __update_common(self): @@ -573,20 +580,19 @@ class ID3(DictProxy, mutagen.Metadata): # Get rid of "(xx)Foobr" format. self["TCON"].genres = self["TCON"].genres - if self.version < self._V23: - # ID3v2.2 PIC frames are slightly different. - pics = self.getall("APIC") - mimes = {"PNG": "image/png", "JPG": "image/jpeg"} - self.delall("APIC") - for pic in pics: + # ID3v2.2 LNK frames are just way too different to upgrade. + for frame in self.getall("LINK"): + if len(frame.frameid) != 4: + del self[frame.HashKey] + + mimes = {"PNG": "image/png", "JPG": "image/jpeg"} + for pic in self.getall("APIC"): + if pic.mime in mimes: newpic = APIC( - encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime), + encoding=pic.encoding, mime=mimes[pic.mime], type=pic.type, desc=pic.desc, data=pic.data) self.add(newpic) - # ID3v2.2 LNK frames are just way too different to upgrade. - self.delall("LINK") - def update_to_v24(self): """Convert older tags into an ID3v2.4 tag. @@ -597,7 +603,7 @@ class ID3(DictProxy, mutagen.Metadata): self.__update_common() - if self.__unknown_version == (2, 3, 0): + if self.__unknown_version == (2, 3): # convert unknown 2.3 frames (flags/size) to 2.4 converted = [] for frame in self.unknown_frames: @@ -606,19 +612,23 @@ class ID3(DictProxy, mutagen.Metadata): frame = BinaryFrame.fromData(self, flags, frame[10:]) except (struct.error, error): continue + converted.append(self.__save_frame(frame, name=name)) self.unknown_frames[:] = converted - self.__unknown_version = (2, 4, 0) + self.__unknown_version = (2, 4) # TDAT, TYER, and TIME have been turned into TDRC. try: - if str(self.get("TYER", "")).strip("\x00"): - date = str(self.pop("TYER")) - if str(self.get("TDAT", "")).strip("\x00"): - dat = str(self.pop("TDAT")) + date = text_type(self.get("TYER", "")) + if date.strip(u"\x00"): + self.pop("TYER") + dat = text_type(self.get("TDAT", "")) + if dat.strip("\x00"): + self.pop("TDAT") date = "%s-%s-%s" % (date, dat[2:], dat[:2]) - if str(self.get("TIME", "")).strip("\x00"): - time = str(self.pop("TIME")) + time = text_type(self.get("TIME", "")) + if time.strip("\x00"): + self.pop("TIME") date += "T%s:%s:00" % (time[:2], time[2:]) if "TDRC" not in self: self.add(TDRC(encoding=0, text=date)) @@ -723,14 +733,10 @@ def delete(filename, delete_v1=True, delete_v2=True): f = open(filename, 'rb+') if delete_v1: - try: - f.seek(-128, 2) - except IOError: - pass - else: - if f.read(3) == "TAG": - f.seek(-128, 2) - f.truncate() + tag, offset = _find_id3v1(f) + if tag is not None: + f.seek(offset, 2) + f.truncate() # technically an insize=0 tag is invalid, but we delete it anyway # (primarily because we used to write it) @@ -740,9 +746,9 @@ def delete(filename, delete_v1=True, delete_v2=True): try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) except struct.error: - id3, insize = '', -1 + id3, insize = b'', -1 insize = BitPaddedInt(insize) - if id3 == 'ID3' and insize >= 0: + if id3 == b'ID3' and insize >= 0: delete_bytes(f, insize + 10, 0) @@ -750,15 +756,116 @@ def delete(filename, delete_v1=True, delete_v2=True): Open = ID3 -# ID3v1.1 support. -def ParseID3v1(string): - """Parse an ID3v1 tag, returning a list of ID3v2.4 frames.""" +def _determine_bpi(data, frames, EMPTY=b"\x00" * 10): + """Takes id3v2.4 frame data and determines if ints or bitpaddedints + should be used for parsing. Needed because iTunes used to write + normal ints for frame sizes. + """ + + # count number of tags found as BitPaddedInt and how far past + o = 0 + asbpi = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + bpioff = -((len(data) - o) % 10) + break + name, size, flags = unpack('>4sLH', part) + size = BitPaddedInt(size) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asbpi += 1 + else: + bpioff = o - len(data) + + # count number of tags found as int and how far past + o = 0 + asint = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + intoff = -((len(data) - o) % 10) + break + name, size, flags = unpack('>4sLH', part) + o += 10 + size + if PY3: + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue + if name in frames: + asint += 1 + else: + intoff = o - len(data) + + # if more tags as int, or equal and bpi is past and int is not + if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): + return int + return BitPaddedInt + + +def _find_id3v1(fileobj): + """Returns a tuple of (id3tag, offset_to_end) or (None, 0) + + offset mainly because we used to write too short tags in some cases and + we need the offset to delete them. + """ + + # id3v1 is always at the end (after apev2) + + extra_read = b"APETAGEX".index(b"TAG") try: - string = string[string.index("TAG"):] + fileobj.seek(-128 - extra_read, 2) + except IOError as e: + if e.errno == errno.EINVAL: + # If the file is too small, might be ok since we wrote too small + # tags at some point. let's see how the parsing goes.. + fileobj.seek(0, 0) + else: + raise + + data = fileobj.read(128 + extra_read) + try: + idx = data.index(b"TAG") + except ValueError: + return (None, 0) + else: + # FIXME: make use of the apev2 parser here + # if TAG is part of APETAGEX assume this is an APEv2 tag + try: + ape_idx = data.index(b"APETAGEX") + except ValueError: + pass + else: + if idx == ape_idx + extra_read: + return (None, 0) + + tag = ParseID3v1(data[idx:]) + if tag is None: + return (None, 0) + + offset = idx - len(data) + return (tag, offset) + + +# ID3v1.1 support. +def ParseID3v1(data): + """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. + + Returns a {frame_name: frame} dict or None. + """ + + try: + data = data[data.index(b"TAG"):] except ValueError: return None - if 128 < len(string) or len(string) < 124: + if 128 < len(data) or len(data) < 124: return None # Issue #69 - Previous versions of Mutagen, when encountering @@ -766,19 +873,19 @@ def ParseID3v1(string): # wrote only the characters available - e.g. "1" or "" - into the # year field. To parse those, reduce the size of the year field. # Amazingly, "0s" works as a struct format string. - unpack_fmt = "3s30s30s30s%ds29sBB" % (len(string) - 124) + unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124) try: tag, title, artist, album, year, comment, track, genre = unpack( - unpack_fmt, string) + unpack_fmt, data) except StructError: return None - if tag != "TAG": + if tag != b"TAG": return None - def fix(string): - return string.split("\x00")[0].strip().decode('latin1') + def fix(data): + return data.split(b"\x00")[0].strip().decode('latin1') title, artist, album, year, comment = map( fix, [title, artist, album, year, comment]) @@ -797,7 +904,7 @@ def ParseID3v1(string): encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) # Don't read a track number if it looks like the comment was # padded with spaces instead of nulls (thanks, WinAmp). - if track and (track != 32 or string[-3] == '\x00'): + if track and ((track != 32) or (data[-3] == b'\x00'[0])): frames["TRCK"] = TRCK(encoding=0, text=str(track)) if genre != 255: frames["TCON"] = TCON(encoding=0, text=str(genre)) @@ -814,22 +921,22 @@ def MakeID3v1(id3): if v2id in id3: text = id3[v2id].text[0].encode('latin1', 'replace')[:30] else: - text = "" - v1[name] = text + ("\x00" * (30 - len(text))) + text = b"" + v1[name] = text + (b"\x00" * (30 - len(text))) if "COMM" in id3: cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] else: - cmnt = "" - v1["comment"] = cmnt + ("\x00" * (29 - len(cmnt))) + cmnt = b"" + v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt))) if "TRCK" in id3: try: - v1["track"] = chr(+id3["TRCK"]) + v1["track"] = chr_(+id3["TRCK"]) except ValueError: - v1["track"] = "\x00" + v1["track"] = b"\x00" else: - v1["track"] = "\x00" + v1["track"] = b"\x00" if "TCON" in id3: try: @@ -838,20 +945,28 @@ def MakeID3v1(id3): pass else: if genre in TCON.GENRES: - v1["genre"] = chr(TCON.GENRES.index(genre)) + v1["genre"] = chr_(TCON.GENRES.index(genre)) if "genre" not in v1: - v1["genre"] = "\xff" + v1["genre"] = b"\xff" if "TDRC" in id3: - year = str(id3["TDRC"]) + year = text_type(id3["TDRC"]).encode('ascii') elif "TYER" in id3: - year = str(id3["TYER"]) + year = text_type(id3["TYER"]).encode('ascii') else: - year = "" - v1["year"] = (year + "\x00\x00\x00\x00")[:4] + year = b"" + v1["year"] = (year + b"\x00\x00\x00\x00")[:4] - return ("TAG%(title)s%(artist)s%(album)s%(year)s%(comment)s" - "%(track)s%(genre)s") % v1 + return ( + b"TAG" + + v1["title"] + + v1["artist"] + + v1["album"] + + v1["year"] + + v1["comment"] + + v1["track"] + + v1["genre"] + ) class ID3FileType(mutagen.FileType): @@ -859,7 +974,7 @@ class ID3FileType(mutagen.FileType): ID3 = ID3 - class _Info(object): + class _Info(mutagen.StreamInfo): length = 0 def __init__(self, fileobj, offset): @@ -870,8 +985,8 @@ class ID3FileType(mutagen.FileType): return "Unknown format with ID3 tag" @staticmethod - def score(filename, fileobj, header): - return header.startswith("ID3") + def score(filename, fileobj, header_data): + return header_data.startswith(b"ID3") def add_tags(self, ID3=None): """Add an empty ID3 tag to the file. diff --git a/lib/mutagen/_id3frames.py b/lib/mutagen/id3/_frames.py similarity index 87% rename from lib/mutagen/_id3frames.py rename to lib/mutagen/id3/_frames.py index c6130f6b..75bfee53 100644 --- a/lib/mutagen/_id3frames.py +++ b/lib/mutagen/id3/_frames.py @@ -1,28 +1,37 @@ +# -*- coding: utf-8 -*- + # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. -from zlib import error as zlibError +import zlib from warnings import warn from struct import unpack -from mutagen._id3util import ( +from ._util import ( ID3Warning, ID3JunkFrameError, ID3BadCompressedData, ID3EncryptionUnsupportedError, ID3BadUnsynchData, unsynch) -from mutagen._id3specs import ( +from ._specs import ( BinaryDataSpec, StringSpec, Latin1TextSpec, EncodedTextSpec, ByteSpec, EncodingSpec, ASPIIndexSpec, SizedIntegerSpec, IntegerSpec, VolumeAdjustmentsSpec, VolumePeakSpec, VolumeAdjustmentSpec, ChannelSpec, MultiSpec, SynchronizedTextSpec, KeyEventSpec, TimeStampSpec, EncodedNumericPartTextSpec, EncodedNumericTextSpec) +from .._compat import text_type, string_types, swap_to_string, iteritems def is_valid_frame_id(frame_id): return frame_id.isalnum() and frame_id.isupper() +def _bytes2key(b): + assert isinstance(b, bytes) + + return b.decode("latin1") + + class Frame(object): """Fundamental unit of ID3 data. @@ -52,13 +61,8 @@ class Frame(object): if len(args) == 1 and len(kwargs) == 0 and \ isinstance(args[0], type(self)): other = args[0] - for checker in self._framespec: - try: - val = checker.validate(self, getattr(other, checker.name)) - except ValueError as e: - e.message = "%s: %s" % (checker.name, e.message) - raise - setattr(self, checker.name, val) + # ask the sub class to fill in our data + other._to_other(self) else: for checker, val in zip(self._framespec, args): setattr(self, checker.name, checker.validate(self, val)) @@ -67,10 +71,17 @@ class Frame(object): validated = checker.validate( self, kwargs.get(checker.name, None)) except ValueError as e: - e.message = "%s: %s" % (checker.name, e.message) - raise + raise ValueError("%s: %s" % (checker.name, e)) setattr(self, checker.name, validated) + def _to_other(self, other): + # this impl covers subclasses with the same framespec + if other._framespec is not self._framespec: + raise ValueError + + for checker in other._framespec: + setattr(other, checker.name, getattr(self, checker.name)) + def _get_v23_frame(self, **kwargs): """Returns a frame copy which is suitable for writing into a v2.3 tag. @@ -104,7 +115,9 @@ class Frame(object): """ kw = [] for attr in self._framespec: - kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + # so repr works during __init__ + if hasattr(self, attr.name): + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) return '%s(%s)' % (type(self).__name__, ', '.join(kw)) def _readData(self, data): @@ -118,7 +131,7 @@ class Frame(object): else: raise ID3JunkFrameError setattr(self, reader.name, value) - if data.strip('\x00'): + if data.strip(b'\x00'): warn('Leftover data: %s: %r (from %r)' % ( type(self).__name__, data, odata), ID3Warning) @@ -127,7 +140,7 @@ class Frame(object): data = [] for writer in self._framespec: data.append(writer.write(self, getattr(self, writer.name))) - return ''.join(data) + return b''.join(data) def pprint(self): """Return a human-readable representation of the frame.""" @@ -151,21 +164,21 @@ class Frame(object): if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch: try: data = unsynch.decode(data) - except ValueError, err: + except ValueError as err: if id3.PEDANTIC: raise ID3BadUnsynchData('%s: %r' % (err, data)) if tflags & Frame.FLAG24_ENCRYPT: raise ID3EncryptionUnsupportedError if tflags & Frame.FLAG24_COMPRESS: try: - data = data.decode('zlib') - except zlibError, err: + data = zlib.decompress(data) + except zlib.error as err: # the initial mutagen that went out with QL 0.12 did not # write the 4 bytes of uncompressed size. Compensate. data = datalen_bytes + data try: - data = data.decode('zlib') - except zlibError, err: + data = zlib.decompress(data) + except zlib.error as err: if id3.PEDANTIC: raise ID3BadCompressedData('%s: %r' % (err, data)) @@ -177,8 +190,8 @@ class Frame(object): raise ID3EncryptionUnsupportedError if tflags & Frame.FLAG23_COMPRESS: try: - data = data.decode('zlib') - except zlibError, err: + data = zlib.decompress(data) + except zlib.error as err: if id3.PEDANTIC: raise ID3BadCompressedData('%s: %r' % (err, data)) @@ -210,6 +223,17 @@ class FrameOpt(Frame): else: break + def _to_other(self, other): + super(FrameOpt, self)._to_other(other) + + # this impl covers subclasses with the same optionalspec + if other._optionalspec is not self._optionalspec: + raise ValueError + + for checker in other._optionalspec: + if hasattr(self, checker.name): + setattr(other, checker.name, getattr(self, checker.name)) + def _readData(self, data): odata = data for reader in self._framespec: @@ -225,7 +249,7 @@ class FrameOpt(Frame): else: break setattr(self, reader.name, value) - if data.strip('\x00'): + if data.strip(b'\x00'): warn('Leftover data: %s: %r (from %r)' % ( type(self).__name__, data, odata), ID3Warning) @@ -239,7 +263,7 @@ class FrameOpt(Frame): data.append(writer.write(self, getattr(self, writer.name))) except AttributeError: break - return ''.join(data) + return b''.join(data) def __repr__(self): kw = [] @@ -251,6 +275,7 @@ class FrameOpt(Frame): return '%s(%s)' % (type(self).__name__, ', '.join(kw)) +@swap_to_string class TextFrame(Frame): """Text strings. @@ -271,17 +296,17 @@ class TextFrame(Frame): MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000'), ] - def __str__(self): - return self.__unicode__().encode('utf-8') + def __bytes__(self): + return text_type(self).encode('utf-8') - def __unicode__(self): + def __str__(self): return u'\u0000'.join(self.text) def __eq__(self, other): - if isinstance(other, str): - return str(self) == other - elif isinstance(other, unicode): - return unicode(self) == other + if isinstance(other, bytes): + return bytes(self) == other + elif isinstance(other, text_type): + return text_type(self) == other return self.text == other __hash__ = Frame.__hash__ @@ -344,6 +369,7 @@ class NumericPartTextFrame(TextFrame): return int(self.text[0].split("/")[0]) +@swap_to_string class TimeStampTextFrame(TextFrame): """A list of time stamps. @@ -356,16 +382,17 @@ class TimeStampTextFrame(TextFrame): MultiSpec('text', TimeStampSpec('stamp'), sep=u','), ] - def __str__(self): - return self.__unicode__().encode('utf-8') + def __bytes__(self): + return text_type(self).encode('utf-8') - def __unicode__(self): - return ','.join([stamp.text for stamp in self.text]) + def __str__(self): + return u','.join([stamp.text for stamp in self.text]) def _pprint(self): - return " / ".join([stamp.text for stamp in self.text]) + return u" / ".join([stamp.text for stamp in self.text]) +@swap_to_string class UrlFrame(Frame): """A frame containing a URL string. @@ -380,10 +407,10 @@ class UrlFrame(Frame): _framespec = [Latin1TextSpec('url')] - def __str__(self): + def __bytes__(self): return self.url.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.url def __eq__(self, other): @@ -446,7 +473,7 @@ class TCON(TextFrame): if genreid: for gid in genreid[1:-1].split(")("): if gid.isdigit() and int(gid) < len(self.GENRES): - gid = unicode(self.GENRES[int(gid)]) + gid = text_type(self.GENRES[int(gid)]) newgenres.append(gid) elif gid == "CR": newgenres.append(u"Cover") @@ -467,12 +494,12 @@ class TCON(TextFrame): return genres def __set_genres(self, genres): - if isinstance(genres, basestring): + if isinstance(genres, string_types): genres = [genres] - self.text = map(self.__decode, genres) + self.text = [self.__decode(g) for g in genres] def __decode(self, value): - if isinstance(value, str): + if isinstance(value, bytes): enc = EncodedTextSpec._encodings[self.encoding][0] return value.decode(enc) else: @@ -869,6 +896,7 @@ class SYTC(Frame): __hash__ = Frame.__hash__ +@swap_to_string class USLT(Frame): """Unsynchronised lyrics/text transcription. @@ -885,12 +913,12 @@ class USLT(Frame): @property def HashKey(self): - return '%s:%s:%r' % (self.FrameID, self.desc, self.lang) + return '%s:%s:%s' % (self.FrameID, self.desc, self.lang) - def __str__(self): + def __bytes__(self): return self.text.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.text def __eq__(self, other): @@ -899,6 +927,7 @@ class USLT(Frame): __hash__ = Frame.__hash__ +@swap_to_string class SYLT(Frame): """Synchronised lyrics/text.""" @@ -913,7 +942,7 @@ class SYLT(Frame): @property def HashKey(self): - return '%s:%s:%r' % (self.FrameID, self.desc, self.lang) + return '%s:%s:%s' % (self.FrameID, self.desc, self.lang) def __eq__(self, other): return str(self) == other @@ -921,7 +950,10 @@ class SYLT(Frame): __hash__ = Frame.__hash__ def __str__(self): - return "".join([text for (text, time) in self.text]).encode('utf-8') + return u"".join(text for (text, time) in self.text) + + def __bytes__(self): + return text_type(self).encode("utf-8") class COMM(TextFrame): @@ -940,10 +972,10 @@ class COMM(TextFrame): @property def HashKey(self): - return '%s:%s:%r' % (self.FrameID, self.desc, self.lang) + return '%s:%s:%s' % (self.FrameID, self.desc, self.lang) def _pprint(self): - return "%s=%r=%s" % (self.desc, self.lang, " / ".join(self.text)) + return "%s=%s=%s" % (self.desc, self.lang, " / ".join(self.text)) class RVA2(Frame): @@ -1077,6 +1109,12 @@ class APIC(Frame): def HashKey(self): return '%s:%s' % (self.FrameID, self.desc) + def _validate_from_22(self, other, checker): + if checker.name == "mime": + self.mime = other.mime.decode("ascii", "ignore") + else: + super(APIC, self)._validate_from_22(other, checker) + def _pprint(self): return "%s (%s, %d bytes)" % ( self.desc, self.mime, len(self.data)) @@ -1102,7 +1140,7 @@ class PCNT(Frame): return self.count def _pprint(self): - return unicode(self.count) + return text_type(self.count) class POPM(FrameOpt): @@ -1202,6 +1240,7 @@ class RBUF(FrameOpt): return self.size +@swap_to_string class AENC(FrameOpt): """Audio encryption. @@ -1227,10 +1266,10 @@ class AENC(FrameOpt): def HashKey(self): return '%s:%s' % (self.FrameID, self.owner) - def __str__(self): + def __bytes__(self): return self.owner.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.owner def __eq__(self, other): @@ -1259,8 +1298,8 @@ class LINK(FrameOpt): @property def HashKey(self): try: - return "%s:%s:%s:%r" % ( - self.FrameID, self.frameid, self.url, self.data) + return "%s:%s:%s:%s" % ( + self.FrameID, self.frameid, self.url, _bytes2key(self.data)) except AttributeError: return "%s:%s:%s" % (self.FrameID, self.frameid, self.url) @@ -1323,13 +1362,10 @@ class UFID(Frame): __hash__ = Frame.__hash__ def _pprint(self): - isascii = ord(max(self.data)) < 128 - if isascii: - return "%s=%s" % (self.owner, self.data) - else: - return "%s (%d bytes)" % (self.owner, len(self.data)) + return "%s=%r" % (self.owner, self.data) +@swap_to_string class USER(Frame): """Terms of use. @@ -1348,12 +1384,12 @@ class USER(Frame): @property def HashKey(self): - return '%s:%r' % (self.FrameID, self.lang) + return '%s:%s' % (self.FrameID, self.lang) - def __str__(self): + def __bytes__(self): return self.text.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.text def __eq__(self, other): @@ -1365,6 +1401,7 @@ class USER(Frame): return "%r=%s" % (self.lang, self.text) +@swap_to_string class OWNE(Frame): """Ownership frame.""" @@ -1375,10 +1412,10 @@ class OWNE(Frame): EncodedTextSpec('seller'), ] - def __str__(self): + def __bytes__(self): return self.seller.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.seller def __eq__(self, other): @@ -1407,7 +1444,7 @@ class COMR(FrameOpt): @property def HashKey(self): - return '%s:%s' % (self.FrameID, self._writeData()) + return '%s:%s' % (self.FrameID, _bytes2key(self._writeData())) def __eq__(self, other): return self._writeData() == other._writeData() @@ -1415,6 +1452,7 @@ class COMR(FrameOpt): __hash__ = FrameOpt.__hash__ +@swap_to_string class ENCR(Frame): """Encryption method registration. @@ -1432,7 +1470,7 @@ class ENCR(Frame): def HashKey(self): return "%s:%s" % (self.FrameID, self.owner) - def __str__(self): + def __bytes__(self): return self.data def __eq__(self, other): @@ -1441,6 +1479,7 @@ class ENCR(Frame): __hash__ = Frame.__hash__ +@swap_to_string class GRID(FrameOpt): """Group identification registration.""" @@ -1458,10 +1497,10 @@ class GRID(FrameOpt): def __pos__(self): return self.group - def __str__(self): + def __bytes__(self): return self.owner.encode('utf-8') - def __unicode__(self): + def __str__(self): return self.owner def __eq__(self, other): @@ -1470,6 +1509,7 @@ class GRID(FrameOpt): __hash__ = FrameOpt.__hash__ +@swap_to_string class PRIV(Frame): """Private frame.""" @@ -1481,24 +1521,21 @@ class PRIV(Frame): @property def HashKey(self): return '%s:%s:%s' % ( - self.FrameID, self.owner, self.data.decode('latin1')) + self.FrameID, self.owner, _bytes2key(self.data)) - def __str__(self): + def __bytes__(self): return self.data def __eq__(self, other): return self.data == other def _pprint(self): - isascii = ord(max(self.data)) < 128 - if isascii: - return "%s=%s" % (self.owner, self.data) - else: - return "%s (%d bytes)" % (self.owner, len(self.data)) + return "%s=%r" % (self.owner, self.data) __hash__ = Frame.__hash__ +@swap_to_string class SIGN(Frame): """Signature frame.""" @@ -1509,9 +1546,9 @@ class SIGN(Frame): @property def HashKey(self): - return '%s:%c:%s' % (self.FrameID, self.group, self.sig) + return '%s:%s:%s' % (self.FrameID, self.group, _bytes2key(self.sig)) - def __str__(self): + def __bytes__(self): return self.sig def __eq__(self, other): @@ -1557,15 +1594,6 @@ class ASPI(Frame): __hash__ = Frame.__hash__ -Frames = dict([(k, v) for (k, v) in globals().items() - if len(k) == 4 and isinstance(v, type) and - issubclass(v, Frame)]) -"""All supported ID3v2 frames, keyed by frame name.""" - -del(k) -del(v) - - # ID3v2.2 frames class UFI(UFID): "Unique File Identifier" @@ -1779,8 +1807,8 @@ class COM(COMM): "Comment" -#class RVA(RVAD) -#class EQU(EQUA) +# class RVA(RVAD) +# class EQU(EQUA) class REV(RVRB): @@ -1793,9 +1821,24 @@ class PIC(APIC): The 'mime' attribute of an ID3v2.2 attached picture must be either 'PNG' or 'JPG'. """ - _framespec = [EncodingSpec('encoding'), StringSpec('mime', 3), - ByteSpec('type'), EncodedTextSpec('desc'), - BinaryDataSpec('data')] + + _framespec = [ + EncodingSpec('encoding'), + StringSpec('mime', 3), + ByteSpec('type'), + EncodedTextSpec('desc'), + BinaryDataSpec('data') + ] + + def _to_other(self, other): + if not isinstance(other, APIC): + raise TypeError + + other.encoding = self.encoding + other.mime = self.mime + other.type = self.type + other.desc = self.desc + other.data = self.data class GEO(GEOB): @@ -1830,13 +1873,44 @@ class CRA(AENC): class LNK(LINK): """Linked information""" - _framespec = [StringSpec('frameid', 3), Latin1TextSpec('url')] + + _framespec = [ + StringSpec('frameid', 3), + Latin1TextSpec('url') + ] + _optionalspec = [BinaryDataSpec('data')] + def _to_other(self, other): + if not isinstance(other, LINK): + raise TypeError -Frames_2_2 = dict([(k, v) for (k, v) in globals().items() - if len(k) == 3 and isinstance(v, type) and - issubclass(v, Frame)]) + other.frameid = self.frameid + other.url = self.url + if hasattr(self, "data"): + other.data = self.data -del k -del v + +Frames = {} +"""All supported ID3v2.3/4 frames, keyed by frame name.""" + + +Frames_2_2 = {} +"""All supported ID3v2.2 frames, keyed by frame name.""" + + +k, v = None, None +for k, v in iteritems(globals()): + if isinstance(v, type) and issubclass(v, Frame): + v.__module__ = "mutagen.id3" + + if len(k) == 3: + Frames_2_2[k] = v + elif len(k) == 4: + Frames[k] = v + +try: + del k + del v +except NameError: + pass diff --git a/lib/mutagen/_id3specs.py b/lib/mutagen/id3/_specs.py similarity index 69% rename from lib/mutagen/_id3specs.py rename to lib/mutagen/id3/_specs.py index 32ef3afe..351d5319 100644 --- a/lib/mutagen/_id3specs.py +++ b/lib/mutagen/id3/_specs.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + # Copyright (C) 2005 Michael Urman # # This program is free software; you can redistribute it and/or modify @@ -8,10 +10,13 @@ import struct from struct import unpack, pack from warnings import warn -from mutagen._id3util import ID3JunkFrameError, ID3Warning, BitPaddedInt +from .._compat import text_type, chr_, PY3, swap_to_string, string_types +from .._util import total_ordering, decode_terminated, enum +from ._util import ID3JunkFrameError, ID3Warning, BitPaddedInt class Spec(object): + def __init__(self, name): self.name = name @@ -25,23 +30,34 @@ class Spec(object): return value + def read(self, frame, value): + raise NotImplementedError + + def write(self, frame, value): + raise NotImplementedError + + def validate(self, frame, value): + """Returns the validated data or raises ValueError/TypeError""" + + raise NotImplementedError + class ByteSpec(Spec): def read(self, frame, data): - return ord(data[0]), data[1:] + return bytearray(data)[0], data[1:] def write(self, frame, value): - return chr(value) + return chr_(value) def validate(self, frame, value): if value is not None: - chr(value) + chr_(value) return value class IntegerSpec(Spec): def read(self, frame, data): - return int(BitPaddedInt(data, bits=8)), '' + return int(BitPaddedInt(data, bits=8)), b'' def write(self, frame, value): return BitPaddedInt.to_str(value, bits=8, width=-1) @@ -64,19 +80,27 @@ class SizedIntegerSpec(Spec): return value +@enum +class Encoding(object): + LATIN1 = 0 + UTF16 = 1 + UTF16BE = 2 + UTF8 = 3 + + class EncodingSpec(ByteSpec): def read(self, frame, data): enc, data = super(EncodingSpec, self).read(frame, data) if enc < 16: return enc, data else: - return 0, chr(enc)+data + return 0, chr_(enc) + data def validate(self, frame, value): - if 0 <= value <= 3: - return value if value is None: return None + if 0 <= value <= 3: + return value raise ValueError('Invalid Encoding: %r' % value) def _validate23(self, frame, value, **kwargs): @@ -85,36 +109,73 @@ class EncodingSpec(ByteSpec): class StringSpec(Spec): + """A fixed size ASCII only payload.""" + def __init__(self, name, length): super(StringSpec, self).__init__(name) self.len = length def read(s, frame, data): - return data[:s.len], data[s.len:] + chunk = data[:s.len] + try: + ascii = chunk.decode("ascii") + except UnicodeDecodeError: + raise ID3JunkFrameError("not ascii") + else: + if PY3: + chunk = ascii + + return chunk, data[s.len:] def write(s, frame, value): if value is None: - return '\x00' * s.len + return b'\x00' * s.len else: - return (str(value) + '\x00' * s.len)[:s.len] + if PY3: + value = value.encode("ascii") + return (bytes(value) + b'\x00' * s.len)[:s.len] def validate(s, frame, value): if value is None: return None - if isinstance(value, basestring) and len(value) == s.len: + + if PY3: + if not isinstance(value, str): + raise TypeError("%s has to be str" % s.name) + value.encode("ascii") + else: + if not isinstance(value, bytes): + value = value.encode("ascii") + + if len(value) == s.len: return value + raise ValueError('Invalid StringSpec[%d] data: %r' % (s.len, value)) class BinaryDataSpec(Spec): def read(self, frame, data): - return data, '' + return data, b'' def write(self, frame, value): - return str(value) + if value is None: + return b"" + if isinstance(value, bytes): + return value + value = text_type(value).encode("ascii") + return value def validate(self, frame, value): - return str(value) + if value is None: + return None + + if isinstance(value, bytes): + return value + elif PY3: + raise TypeError("%s has to be bytes" % self.name) + + value = text_type(value).encode("ascii") + return value class EncodedTextSpec(Spec): @@ -122,40 +183,34 @@ class EncodedTextSpec(Spec): # completely by the ID3 specification. You can't just add # encodings here however you want. _encodings = ( - ('latin1', '\x00'), - ('utf16', '\x00\x00'), - ('utf_16_be', '\x00\x00'), - ('utf8', '\x00') + ('latin1', b'\x00'), + ('utf16', b'\x00\x00'), + ('utf_16_be', b'\x00\x00'), + ('utf8', b'\x00') ) def read(self, frame, data): enc, term = self._encodings[frame.encoding] - ret = '' - if len(term) == 1: - if term in data: - data, ret = data.split(term, 1) - else: - offset = -1 - try: - while True: - offset = data.index(term, offset+1) - if offset & 1: - continue - data, ret = data[0:offset], data[offset+2:] - break - except ValueError: - pass + try: + # allow missing termination + return decode_terminated(data, enc, strict=False) + except ValueError: + # utf-16 termination with missing BOM, or single NULL + if not data[:len(term)].strip(b"\x00"): + return u"", data[len(term):] - if len(data) < len(term): - return u'', ret - return data.decode(enc), ret + # utf-16 data with single NULL, see issue 169 + try: + return decode_terminated(data + b"\x00", enc) + except ValueError: + raise ID3JunkFrameError def write(self, frame, value): enc, term = self._encodings[frame.encoding] return value.encode(enc) + term def validate(self, frame, value): - return unicode(value) + return text_type(value) class MultiSpec(Spec): @@ -186,12 +241,12 @@ class MultiSpec(Spec): for record in value: for v, s in zip(record, self.specs): data.append(s.write(frame, v)) - return ''.join(data) + return b''.join(data) def validate(self, frame, value): if value is None: return [] - if self.sep and isinstance(value, basestring): + if self.sep and isinstance(value, string_types): value = value.split(self.sep) if isinstance(value, list): if len(self.specs) == 1: @@ -232,19 +287,21 @@ class EncodedNumericPartTextSpec(EncodedTextSpec): class Latin1TextSpec(EncodedTextSpec): def read(self, frame, data): - if '\x00' in data: - data, ret = data.split('\x00', 1) + if b'\x00' in data: + data, ret = data.split(b'\x00', 1) else: - ret = '' + ret = b'' return data.decode('latin1'), ret def write(self, data, value): - return value.encode('latin1') + '\x00' + return value.encode('latin1') + b'\x00' def validate(self, frame, value): - return unicode(value) + return text_type(value) +@swap_to_string +@total_ordering class ID3TimeStamp(object): """A time stamp in ID3v2 format. @@ -261,6 +318,11 @@ class ID3TimeStamp(object): def __init__(self, text): if isinstance(text, ID3TimeStamp): text = text.text + elif not isinstance(text, text_type): + if PY3: + raise TypeError("not a str") + text = text.decode("utf-8") + self.text = text __formats = ['%04d'] + ['%02d'] * 5 @@ -270,7 +332,9 @@ class ID3TimeStamp(object): parts = [self.year, self.month, self.day, self.hour, self.minute, self.second] pieces = [] - for i, part in enumerate(iter(iter(parts).next, None)): + for i, part in enumerate(parts): + if part is None: + break pieces.append(self.__formats[i] % part + self.__seps[i]) return u''.join(pieces)[:-1] @@ -289,11 +353,17 @@ class ID3TimeStamp(object): def __str__(self): return self.text + def __bytes__(self): + return self.text.encode("utf-8") + def __repr__(self): return repr(self.text) - def __cmp__(self, other): - return cmp(self.text, other.text) + def __eq__(self, other): + return self.text == other.text + + def __lt__(self, other): + return self.text < other.text __hash__ = object.__hash__ @@ -325,10 +395,14 @@ class ChannelSpec(ByteSpec): class VolumeAdjustmentSpec(Spec): def read(self, frame, data): value, = unpack('>h', data[0:2]) - return value/512.0, data[2:] + return value / 512.0, data[2:] def write(self, frame, value): - return pack('>h', int(round(value * 512))) + number = int(round(value * 512)) + # pack only fails in 2.7, do it manually in 2.6 + if not -32768 <= number <= 32767: + raise struct.error + return pack('>h', number) def validate(self, frame, value): if value is not None: @@ -343,21 +417,26 @@ class VolumePeakSpec(Spec): def read(self, frame, data): # http://bugs.xmms.org/attachment.cgi?id=113&action=view peak = 0 - bits = ord(data[0]) - bytes = min(4, (bits + 7) >> 3) + data_array = bytearray(data) + bits = data_array[0] + vol_bytes = min(4, (bits + 7) >> 3) # not enough frame data - if bytes + 1 > len(data): + if vol_bytes + 1 > len(data): raise ID3JunkFrameError - shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8 - for i in range(1, bytes+1): + shift = ((8 - (bits & 7)) & 7) + (4 - vol_bytes) * 8 + for i in range(1, vol_bytes + 1): peak *= 256 - peak += ord(data[i]) + peak += data_array[i] peak *= 2 ** shift - return (float(peak) / (2**31-1)), data[1+bytes:] + return (float(peak) / (2 ** 31 - 1)), data[1 + vol_bytes:] def write(self, frame, value): + number = int(round(value * 32768)) + # pack only fails in 2.7, do it manually in 2.6 + if not 0 <= number <= 65535: + raise struct.error # always write as 16 bits for sanity. - return "\x10" + pack('>H', int(round(value * 32768))) + return b"\x10" + pack('>H', number) def validate(self, frame, value): if value is not None: @@ -373,26 +452,26 @@ class SynchronizedTextSpec(EncodedTextSpec): texts = [] encoding, term = self._encodings[frame.encoding] while data: - l = len(term) try: - value_idx = data.index(term) + value, data = decode_terminated(data, encoding) except ValueError: raise ID3JunkFrameError - value = data[:value_idx].decode(encoding) - if len(data) < value_idx + l + 4: + + if len(data) < 4: raise ID3JunkFrameError - time, = struct.unpack(">I", data[value_idx+l:value_idx+l+4]) + time, = struct.unpack(">I", data[:4]) + texts.append((value, time)) - data = data[value_idx+l+4:] - return texts, "" + data = data[4:] + return texts, b"" def write(self, frame, value): data = [] encoding, term = self._encodings[frame.encoding] - for text, time in frame.text: + for text, time in value: text = text.encode(encoding) + term data.append(text + struct.pack(">I", time)) - return "".join(data) + return b"".join(data) def validate(self, frame, value): return value @@ -407,7 +486,7 @@ class KeyEventSpec(Spec): return events, data def write(self, frame, value): - return "".join([struct.pack(">bI", *event) for event in value]) + return b"".join(struct.pack(">bI", *event) for event in value) def validate(self, frame, value): return value @@ -423,14 +502,13 @@ class VolumeAdjustmentsSpec(Spec): freq /= 2.0 adj /= 512.0 adjustments[freq] = adj - adjustments = adjustments.items() - adjustments.sort() + adjustments = sorted(adjustments.items()) return adjustments, data def write(self, frame, value): value.sort() - return "".join([struct.pack(">Hh", int(freq * 2), int(adj * 512)) - for (freq, adj) in value]) + return b"".join(struct.pack(">Hh", int(freq * 2), int(adj * 512)) + for (freq, adj) in value) def validate(self, frame, value): return value diff --git a/lib/mutagen/_id3util.py b/lib/mutagen/id3/_util.py similarity index 65% rename from lib/mutagen/_id3util.py rename to lib/mutagen/id3/_util.py index de82e36a..363953e7 100644 --- a/lib/mutagen/_id3util.py +++ b/lib/mutagen/id3/_util.py @@ -1,12 +1,18 @@ +# -*- coding: utf-8 -*- + # Copyright (C) 2005 Michael Urman # 2013 Christoph Reiter +# 2014 Ben Ockmore # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. +from .._compat import long_, integer_types +from .._util import MutagenError -class error(Exception): + +class error(MutagenError): pass @@ -45,43 +51,26 @@ class ID3Warning(error, UserWarning): class unsynch(object): @staticmethod def decode(value): - output = [] - safe = True - append = output.append - for val in value: - if safe: - append(val) - safe = val != '\xFF' - else: - if val >= '\xE0': - raise ValueError('invalid sync-safe string') - elif val != '\x00': - append(val) - safe = True - if not safe: + fragments = bytearray(value).split(b'\xff') + if len(fragments) > 1 and not fragments[-1]: raise ValueError('string ended unsafe') - return ''.join(output) + + for f in fragments[1:]: + if (not f) or (f[0] >= 0xE0): + raise ValueError('invalid sync-safe string') + + if f[0] == 0x00: + del f[0] + + return bytes(bytearray(b'\xff').join(fragments)) @staticmethod def encode(value): - output = [] - safe = True - append = output.append - for val in value: - if safe: - append(val) - if val == '\xFF': - safe = False - elif val == '\x00' or val >= '\xE0': - append('\x00') - append(val) - safe = val != '\xFF' - else: - append(val) - safe = True - if not safe: - append('\x00') - return ''.join(output) + fragments = bytearray(value).split(b'\xff') + for f in fragments[1:]: + if (not f) or (f[0] >= 0xE0) or (f[0] == 0x00): + f.insert(0, 0x00) + return bytes(bytearray(b'\xff').join(fragments)) class _BitPaddedMixin(object): @@ -111,11 +100,11 @@ class _BitPaddedMixin(object): while value: append(value & mask) value >>= bits - bytes_ = bytes_.ljust(minwidth, "\x00") + bytes_ = bytes_.ljust(minwidth, b"\x00") if bigendian: bytes_.reverse() - return str(bytes_) + return bytes(bytes_) @staticmethod def has_valid_padding(value, bits=7): @@ -125,14 +114,14 @@ class _BitPaddedMixin(object): mask = (((1 << (8 - bits)) - 1) << bits) - if isinstance(value, (int, long)): + if isinstance(value, integer_types): while value: if value & mask: return False value >>= 8 - elif isinstance(value, str): - for byte in value: - if ord(byte) & mask: + elif isinstance(value, bytes): + for byte in bytearray(value): + if byte & mask: return False else: raise TypeError @@ -148,29 +137,29 @@ class BitPaddedInt(int, _BitPaddedMixin): numeric_value = 0 shift = 0 - if isinstance(value, (int, long)): + if isinstance(value, integer_types): while value: numeric_value += (value & mask) << shift value >>= 8 shift += bits - elif isinstance(value, str): + elif isinstance(value, bytes): if bigendian: value = reversed(value) - for byte in value: - numeric_value += (ord(byte) & mask) << shift + for byte in bytearray(value): + numeric_value += (byte & mask) << shift shift += bits else: raise TypeError - if isinstance(numeric_value, long): - self = long.__new__(BitPaddedLong, numeric_value) - else: + if isinstance(numeric_value, int): self = int.__new__(BitPaddedInt, numeric_value) + else: + self = long_.__new__(BitPaddedLong, numeric_value) self.bits = bits self.bigendian = bigendian return self -class BitPaddedLong(long, _BitPaddedMixin): +class BitPaddedLong(long_, _BitPaddedMixin): pass diff --git a/lib/mutagen/m4a.py b/lib/mutagen/m4a.py index 64b89679..b8fa9784 100644 --- a/lib/mutagen/m4a.py +++ b/lib/mutagen/m4a.py @@ -4,6 +4,11 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +import sys + +if sys.version_info[0] != 2: + raise ImportError("No longer available with Python 3, use mutagen.mp4") + """Read and write MPEG-4 audio files with iTunes metadata. This module will read MPEG-4 audio information and metadata, @@ -25,12 +30,14 @@ import sys from cStringIO import StringIO -from mutagen import FileType, Metadata +from ._compat import reraise +from mutagen import FileType, Metadata, StreamInfo from mutagen._constants import GENRES -from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy +from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy, \ + MutagenError -class error(IOError): +class error(IOError, MutagenError): pass @@ -220,7 +227,7 @@ class M4ATags(DictProxy, Metadata): def load(self, atoms, fileobj): try: ilst = atoms["moov.udta.meta.ilst"] - except KeyError, key: + except KeyError as key: raise M4AMetadataError(key) for atom in ilst.children: fileobj.seek(atom.offset + 8) @@ -451,7 +458,7 @@ class M4ATags(DictProxy, Metadata): return "\n".join(values) -class M4AInfo(object): +class M4AInfo(StreamInfo): """MPEG-4 stream information. Attributes: @@ -510,14 +517,14 @@ class M4A(FileType): atoms = Atoms(fileobj) try: self.info = M4AInfo(atoms, fileobj) - except StandardError, err: - raise M4AStreamInfoError, err, sys.exc_info()[2] + except StandardError as err: + reraise(M4AStreamInfoError, err, sys.exc_info()[2]) try: self.tags = M4ATags(atoms, fileobj) except M4AMetadataError: self.tags = None - except StandardError, err: - raise M4AMetadataError, err, sys.exc_info()[2] + except StandardError as err: + reraise(M4AMetadataError, err, sys.exc_info()[2]) finally: fileobj.close() diff --git a/lib/mutagen/monkeysaudio.py b/lib/mutagen/monkeysaudio.py index 355749b9..536d7b18 100644 --- a/lib/mutagen/monkeysaudio.py +++ b/lib/mutagen/monkeysaudio.py @@ -1,6 +1,6 @@ -# A Monkey's Audio (APE) reader/tagger -# -# Copyright 2006 Lukas Lalinsky +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -18,6 +18,8 @@ __all__ = ["MonkeysAudio", "Open", "delete"] import struct +from ._compat import endswith +from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete from mutagen._util import cdata @@ -26,7 +28,7 @@ class MonkeysAudioHeaderError(error): pass -class MonkeysAudioInfo(object): +class MonkeysAudioInfo(StreamInfo): """Monkey's Audio stream information. Attributes: @@ -40,7 +42,7 @@ class MonkeysAudioInfo(object): def __init__(self, fileobj): header = fileobj.read(76) - if len(header) != 76 or not header.startswith("MAC "): + if len(header) != 76 or not header.startswith(b"MAC "): raise MonkeysAudioHeaderError("not a Monkey's Audio file") self.version = cdata.ushort_le(header[4:6]) if self.version >= 3980: @@ -62,7 +64,7 @@ class MonkeysAudioInfo(object): blocks_per_frame = 9216 self.version /= 1000.0 self.length = 0.0 - if self.sample_rate != 0 and total_frames > 0: + if (self.sample_rate != 0) and (total_frames > 0): total_blocks = ((total_frames - 1) * blocks_per_frame + final_frame_blocks) self.length = float(total_blocks) / self.sample_rate @@ -78,7 +80,7 @@ class MonkeysAudio(APEv2File): @staticmethod def score(filename, fileobj, header): - return header.startswith("MAC ") + filename.lower().endswith(".ape") + return header.startswith(b"MAC ") + endswith(filename.lower(), ".ape") Open = MonkeysAudio diff --git a/lib/mutagen/mp3.py b/lib/mutagen/mp3.py index 2426610b..535c6b4d 100644 --- a/lib/mutagen/mp3.py +++ b/lib/mutagen/mp3.py @@ -1,5 +1,6 @@ -# MP3 stream header information support for Mutagen. -# Copyright 2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as @@ -10,12 +11,15 @@ import os import struct +from ._compat import endswith +from mutagen import StreamInfo +from mutagen._util import MutagenError from mutagen.id3 import ID3FileType, BitPaddedInt, delete __all__ = ["MP3", "Open", "delete", "MP3"] -class error(RuntimeError): +class error(RuntimeError, MutagenError): pass @@ -31,7 +35,7 @@ class InvalidMPEGHeader(error, IOError): STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4) -class MPEGInfo(object): +class MPEGInfo(StreamInfo): """MPEG audio stream information Parse information about an MPEG audio file. This also reads the @@ -58,14 +62,15 @@ class MPEGInfo(object): # Map (version, layer) tuples to bitrates. __BITRATE = { - (1, 1): range(0, 480, 32), + (1, 1): [0, 32, 64, 96, 128, 160, 192, 224, + 256, 288, 320, 352, 384, 416, 448], (1, 2): [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384], (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320], (2, 1): [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256], - (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, + (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160], } @@ -106,7 +111,7 @@ class MPEGInfo(object): except struct.error: id3, insize = '', 0 insize = BitPaddedInt(insize) - if id3 == 'ID3' and insize > 0: + if id3 == b'ID3' and insize > 0: offset = insize + 10 else: offset = 0 @@ -138,11 +143,11 @@ class MPEGInfo(object): # is assuming the offset didn't lie. data = fileobj.read(32768) - frame_1 = data.find("\xff") - while 0 <= frame_1 <= len(data) - 4: + frame_1 = data.find(b"\xff") + while 0 <= frame_1 <= (len(data) - 4): frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0] - if (frame_data >> 16) & 0xE0 != 0xE0: - frame_1 = data.find("\xff", frame_1 + 2) + if ((frame_data >> 16) & 0xE0) != 0xE0: + frame_1 = data.find(b"\xff", frame_1 + 2) else: version = (frame_data >> 19) & 0x3 layer = (frame_data >> 17) & 0x3 @@ -150,15 +155,15 @@ class MPEGInfo(object): bitrate = (frame_data >> 12) & 0xF sample_rate = (frame_data >> 10) & 0x3 padding = (frame_data >> 9) & 0x1 - #private = (frame_data >> 8) & 0x1 + # private = (frame_data >> 8) & 0x1 self.mode = (frame_data >> 6) & 0x3 - #mode_extension = (frame_data >> 4) & 0x3 - #copyright = (frame_data >> 3) & 0x1 - #original = (frame_data >> 2) & 0x1 - #emphasis = (frame_data >> 0) & 0x3 + # mode_extension = (frame_data >> 4) & 0x3 + # copyright = (frame_data >> 3) & 0x1 + # original = (frame_data >> 2) & 0x1 + # emphasis = (frame_data >> 0) & 0x3 if (version == 1 or layer == 0 or sample_rate == 0x3 or bitrate == 0 or bitrate == 0xF): - frame_1 = data.find("\xff", frame_1 + 2) + frame_1 = data.find(b"\xff", frame_1 + 2) else: break else: @@ -176,17 +181,18 @@ class MPEGInfo(object): self.sample_rate = self.__RATES[self.version][sample_rate] if self.layer == 1: - frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4 + frame_length = ( + (12 * self.bitrate // self.sample_rate) + padding) * 4 frame_size = 384 elif self.version >= 2 and self.layer == 3: - frame_length = 72 * self.bitrate / self.sample_rate + padding + frame_length = (72 * self.bitrate // self.sample_rate) + padding frame_size = 576 else: - frame_length = 144 * self.bitrate / self.sample_rate + padding + frame_length = (144 * self.bitrate // self.sample_rate) + padding frame_size = 1152 if check_second: - possible = frame_1 + frame_length + possible = int(frame_1 + frame_length) if possible > len(data) + 4: raise HeaderNotFoundError("can't sync to second MPEG frame") try: @@ -194,7 +200,7 @@ class MPEGInfo(object): ">H", data[possible:possible + 2])[0] except struct.error: raise HeaderNotFoundError("can't sync to second MPEG frame") - if frame_data & 0xFFE0 != 0xFFE0: + if (frame_data & 0xFFE0) != 0xFFE0: raise HeaderNotFoundError("can't sync to second MPEG frame") self.length = 8 * real_size / float(self.bitrate) @@ -204,12 +210,12 @@ class MPEGInfo(object): fileobj.seek(offset, 0) data = fileobj.read(32768) try: - xing = data[:-4].index("Xing") + xing = data[:-4].index(b"Xing") except ValueError: # Try to find/parse the VBRI header, which trumps the above length # calculation. try: - vbri = data[:-24].index("VBRI") + vbri = data[:-24].index(b"VBRI") except ValueError: pass else: @@ -230,8 +236,9 @@ class MPEGInfo(object): samples = float(frame_size * frame_count) self.length = (samples / self.sample_rate) or self.length if flags & 0x2: - bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0] - self.bitrate = int((bytes * 8) // self.length) + bitrate_data = struct.unpack( + '>I', data[xing + 12:xing + 16])[0] + self.bitrate = int((bitrate_data * 8) // self.length) def pprint(self): s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % ( @@ -250,15 +257,22 @@ class MP3(ID3FileType): """ _Info = MPEGInfo - _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg", - "audio/x-mpeg"] + + _mimes = ["audio/mpeg", "audio/mpg", "audio/x-mpeg"] + + @property + def mime(self): + l = self.info.layer + return ["audio/mp%d" % l, "audio/x-mp%d" % l] + super(MP3, self).mime @staticmethod - def score(filename, fileobj, header): + def score(filename, fileobj, header_data): filename = filename.lower() - return (header.startswith("ID3") * 2 + filename.endswith(".mp3") + - filename.endswith(".mp2") + filename.endswith(".mpg") + - filename.endswith(".mpeg")) + + return (header_data.startswith(b"ID3") * 2 + + endswith(filename, b".mp3") + + endswith(filename, b".mp2") + endswith(filename, b".mpg") + + endswith(filename, b".mpeg")) Open = MP3 diff --git a/lib/mutagen/mp4.py b/lib/mutagen/mp4.py deleted file mode 100644 index 984a38c4..00000000 --- a/lib/mutagen/mp4.py +++ /dev/null @@ -1,822 +0,0 @@ -# Copyright 2006 Joe Wreschnig -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Read and write MPEG-4 audio files with iTunes metadata. - -This module will read MPEG-4 audio information and metadata, -as found in Apple's MP4 (aka M4A, M4B, M4P) files. - -There is no official specification for this format. The source code -for TagLib, FAAD, and various MPEG specifications at - -* http://developer.apple.com/documentation/QuickTime/QTFF/ -* http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt -* http://standards.iso.org/ittf/PubliclyAvailableStandards/\ -c041828_ISO_IEC_14496-12_2005(E).zip -* http://wiki.multimedia.cx/index.php?title=Apple_QuickTime - -were all consulted. -""" - -import struct -import sys - -from mutagen import FileType, Metadata -from mutagen._constants import GENRES -from mutagen._util import cdata, insert_bytes, DictProxy, utf8 - - -class error(IOError): - pass - - -class MP4MetadataError(error): - pass - - -class MP4StreamInfoError(error): - pass - - -class MP4MetadataValueError(ValueError, MP4MetadataError): - pass - - -# This is not an exhaustive list of container atoms, but just the -# ones this module needs to peek inside. -_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst", - "stbl", "minf", "moof", "traf"] -_SKIP_SIZE = {"meta": 4} - -__all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm'] - - -class MP4Cover(str): - """A cover artwork. - - Attributes: - - * imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) - """ - FORMAT_JPEG = 0x0D - FORMAT_PNG = 0x0E - - def __new__(cls, data, *args, **kwargs): - return str.__new__(cls, data) - - def __init__(self, data, imageformat=FORMAT_JPEG): - self.imageformat = imageformat - try: - self.format - except AttributeError: - self.format = imageformat - - -class MP4FreeForm(str): - """A freeform value. - - Attributes: - - * dataformat -- format of the data (either FORMAT_TEXT or FORMAT_DATA) - """ - - FORMAT_DATA = 0x0 - FORMAT_TEXT = 0x1 - - def __new__(cls, data, *args, **kwargs): - return str.__new__(cls, data) - - def __init__(self, data, dataformat=FORMAT_TEXT): - self.dataformat = dataformat - - -class Atom(object): - """An individual atom. - - Attributes: - children -- list child atoms (or None for non-container atoms) - length -- length of this atom, including length and name - name -- four byte name of the atom, as a str - offset -- location in the constructor-given fileobj of this atom - - This structure should only be used internally by Mutagen. - """ - - children = None - - def __init__(self, fileobj, level=0): - self.offset = fileobj.tell() - self.length, self.name = struct.unpack(">I4s", fileobj.read(8)) - if self.length == 1: - self.length, = struct.unpack(">Q", fileobj.read(8)) - if self.length < 16: - raise MP4MetadataError( - "64 bit atom length can only be 16 and higher") - elif self.length == 0: - if level != 0: - raise MP4MetadataError( - "only a top-level atom can have zero length") - # Only the last atom is supposed to have a zero-length, meaning it - # extends to the end of file. - fileobj.seek(0, 2) - self.length = fileobj.tell() - self.offset - fileobj.seek(self.offset + 8, 0) - elif self.length < 8: - raise MP4MetadataError( - "atom length can only be 0, 1 or 8 and higher") - - if self.name in _CONTAINERS: - self.children = [] - fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1) - while fileobj.tell() < self.offset + self.length: - self.children.append(Atom(fileobj, level + 1)) - else: - fileobj.seek(self.offset + self.length, 0) - - @staticmethod - def render(name, data): - """Render raw atom data.""" - # this raises OverflowError if Py_ssize_t can't handle the atom data - size = len(data) + 8 - if size <= 0xFFFFFFFF: - return struct.pack(">I4s", size, name) + data - else: - return struct.pack(">I4sQ", 1, name, size + 8) + data - - def findall(self, name, recursive=False): - """Recursively find all child atoms by specified name.""" - if self.children is not None: - for child in self.children: - if child.name == name: - yield child - if recursive: - for atom in child.findall(name, True): - yield atom - - def __getitem__(self, remaining): - """Look up a child atom, potentially recursively. - - e.g. atom['udta', 'meta'] => - """ - if not remaining: - return self - elif self.children is None: - raise KeyError("%r is not a container" % self.name) - for child in self.children: - if child.name == remaining[0]: - return child[remaining[1:]] - else: - raise KeyError("%r not found" % remaining[0]) - - def __repr__(self): - klass = self.__class__.__name__ - if self.children is None: - return "<%s name=%r length=%r offset=%r>" % ( - klass, self.name, self.length, self.offset) - else: - children = "\n".join([" " + line for child in self.children - for line in repr(child).splitlines()]) - return "<%s name=%r length=%r offset=%r\n%s>" % ( - klass, self.name, self.length, self.offset, children) - - -class Atoms(object): - """Root atoms in a given file. - - Attributes: - atoms -- a list of top-level atoms as Atom objects - - This structure should only be used internally by Mutagen. - """ - - def __init__(self, fileobj): - self.atoms = [] - fileobj.seek(0, 2) - end = fileobj.tell() - fileobj.seek(0) - while fileobj.tell() + 8 <= end: - self.atoms.append(Atom(fileobj)) - - def path(self, *names): - """Look up and return the complete path of an atom. - - For example, atoms.path('moov', 'udta', 'meta') will return a - list of three atoms, corresponding to the moov, udta, and meta - atoms. - """ - - path = [self] - for name in names: - path.append(path[-1][name, ]) - return path[1:] - - def __contains__(self, names): - try: - self[names] - except KeyError: - return False - return True - - def __getitem__(self, names): - """Look up a child atom. - - 'names' may be a list of atoms (['moov', 'udta']) or a string - specifying the complete path ('moov.udta'). - """ - - if isinstance(names, basestring): - names = names.split(".") - for child in self.atoms: - if child.name == names[0]: - return child[names[1:]] - else: - raise KeyError("%s not found" % names[0]) - - def __repr__(self): - return "\n".join([repr(child) for child in self.atoms]) - - -class MP4Tags(DictProxy, Metadata): - r"""Dictionary containing Apple iTunes metadata list key/values. - - Keys are four byte identifiers, except for freeform ('----') - keys. Values are usually unicode strings, but some atoms have a - special structure: - - Text values (multiple values per key are supported): - - * '\\xa9nam' -- track title - * '\\xa9alb' -- album - * '\\xa9ART' -- artist - * 'aART' -- album artist - * '\\xa9wrt' -- composer - * '\\xa9day' -- year - * '\\xa9cmt' -- comment - * 'desc' -- description (usually used in podcasts) - * 'purd' -- purchase date - * '\\xa9grp' -- grouping - * '\\xa9gen' -- genre - * '\\xa9lyr' -- lyrics - * 'purl' -- podcast URL - * 'egid' -- podcast episode GUID - * 'catg' -- podcast category - * 'keyw' -- podcast keywords - * '\\xa9too' -- encoded by - * 'cprt' -- copyright - * 'soal' -- album sort order - * 'soaa' -- album artist sort order - * 'soar' -- artist sort order - * 'sonm' -- title sort order - * 'soco' -- composer sort order - * 'sosn' -- show sort order - * 'tvsh' -- show name - - Boolean values: - - * 'cpil' -- part of a compilation - * 'pgap' -- part of a gapless album - * 'pcst' -- podcast (iTunes reads this only on import) - - Tuples of ints (multiple values per key are supported): - - * 'trkn' -- track number, total tracks - * 'disk' -- disc number, total discs - - Others: - - * 'tmpo' -- tempo/BPM, 16 bit int - * 'covr' -- cover artwork, list of MP4Cover objects (which are - tagged strs) - * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead. - - The freeform '----' frames use a key in the format '----:mean:name' - where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique - identifier for this frame. The value is a str, but is probably - text that can be decoded as UTF-8. Multiple values per key are - supported. - - MP4 tag data cannot exist outside of the structure of an MP4 file, - so this class should not be manually instantiated. - - Unknown non-text tags are removed. - """ - - def load(self, atoms, fileobj): - try: - ilst = atoms["moov.udta.meta.ilst"] - except KeyError, key: - raise MP4MetadataError(key) - for atom in ilst.children: - fileobj.seek(atom.offset + 8) - data = fileobj.read(atom.length - 8) - if len(data) != atom.length - 8: - raise MP4MetadataError("Not enough data") - - if atom.name in self.__atoms: - info = self.__atoms[atom.name] - info[0](self, atom, data, *info[2:]) - else: - # unknown atom, try as text and skip if it fails - # FIXME: keep them somehow - try: - self.__parse_text(atom, data) - except MP4MetadataError: - continue - - @classmethod - def _can_load(cls, atoms): - return "moov.udta.meta.ilst" in atoms - - @staticmethod - def __key_sort(item1, item2): - (key1, v1) = item1 - (key2, v2) = item2 - # iTunes always writes the tags in order of "relevance", try - # to copy it as closely as possible. - order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", - "\xa9gen", "gnre", "trkn", "disk", - "\xa9day", "cpil", "pgap", "pcst", "tmpo", - "\xa9too", "----", "covr", "\xa9lyr"] - order = dict(zip(order, range(len(order)))) - last = len(order) - # If there's no key-based way to distinguish, order by length. - # If there's still no way, go by string comparison on the - # values, so we at least have something determinstic. - return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or - cmp(len(v1), len(v2)) or cmp(v1, v2)) - - def save(self, filename): - """Save the metadata to the given filename.""" - values = [] - items = self.items() - items.sort(self.__key_sort) - for key, value in items: - info = self.__atoms.get(key[:4], (None, type(self).__render_text)) - try: - values.append(info[1](self, key, value, *info[2:])) - except (TypeError, ValueError), s: - raise MP4MetadataValueError, s, sys.exc_info()[2] - data = Atom.render("ilst", "".join(values)) - - # Find the old atoms. - fileobj = open(filename, "rb+") - try: - atoms = Atoms(fileobj) - try: - path = atoms.path("moov", "udta", "meta", "ilst") - except KeyError: - self.__save_new(fileobj, atoms, data) - else: - self.__save_existing(fileobj, atoms, path, data) - finally: - fileobj.close() - - def __pad_ilst(self, data, length=None): - if length is None: - length = ((len(data) + 1023) & ~1023) - len(data) - return Atom.render("free", "\x00" * length) - - def __save_new(self, fileobj, atoms, ilst): - hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9) - meta = Atom.render( - "meta", "\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst)) - try: - path = atoms.path("moov", "udta") - except KeyError: - # moov.udta not found -- create one - path = atoms.path("moov") - meta = Atom.render("udta", meta) - offset = path[-1].offset + 8 - insert_bytes(fileobj, len(meta), offset) - fileobj.seek(offset) - fileobj.write(meta) - self.__update_parents(fileobj, path, len(meta)) - self.__update_offsets(fileobj, atoms, len(meta), offset) - - def __save_existing(self, fileobj, atoms, path, data): - # Replace the old ilst atom. - ilst = path.pop() - offset = ilst.offset - length = ilst.length - - # Check for padding "free" atoms - meta = path[-1] - index = meta.children.index(ilst) - try: - prev = meta.children[index-1] - if prev.name == "free": - offset = prev.offset - length += prev.length - except IndexError: - pass - try: - next = meta.children[index+1] - if next.name == "free": - length += next.length - except IndexError: - pass - - delta = len(data) - length - if delta > 0 or (delta < 0 and delta > -8): - data += self.__pad_ilst(data) - delta = len(data) - length - insert_bytes(fileobj, delta, offset) - elif delta < 0: - data += self.__pad_ilst(data, -delta - 8) - delta = 0 - - fileobj.seek(offset) - fileobj.write(data) - self.__update_parents(fileobj, path, delta) - self.__update_offsets(fileobj, atoms, delta, offset) - - def __update_parents(self, fileobj, path, delta): - """Update all parent atoms with the new size.""" - for atom in path: - fileobj.seek(atom.offset) - size = cdata.uint_be(fileobj.read(4)) - if size == 1: # 64bit - # skip name (4B) and read size (8B) - size = cdata.ulonglong_be(fileobj.read(12)[4:]) - fileobj.seek(atom.offset + 8) - fileobj.write(cdata.to_ulonglong_be(size + delta)) - else: # 32bit - fileobj.seek(atom.offset) - fileobj.write(cdata.to_uint_be(size + delta)) - - def __update_offset_table(self, fileobj, fmt, atom, delta, offset): - """Update offset table in the specified atom.""" - if atom.offset > offset: - atom.offset += delta - fileobj.seek(atom.offset + 12) - data = fileobj.read(atom.length - 12) - fmt = fmt % cdata.uint_be(data[:4]) - offsets = struct.unpack(fmt, data[4:]) - offsets = [o + (0, delta)[offset < o] for o in offsets] - fileobj.seek(atom.offset + 16) - fileobj.write(struct.pack(fmt, *offsets)) - - def __update_tfhd(self, fileobj, atom, delta, offset): - if atom.offset > offset: - atom.offset += delta - fileobj.seek(atom.offset + 9) - data = fileobj.read(atom.length - 9) - flags = cdata.uint_be("\x00" + data[:3]) - if flags & 1: - o = cdata.ulonglong_be(data[7:15]) - if o > offset: - o += delta - fileobj.seek(atom.offset + 16) - fileobj.write(cdata.to_ulonglong_be(o)) - - def __update_offsets(self, fileobj, atoms, delta, offset): - """Update offset tables in all 'stco' and 'co64' atoms.""" - if delta == 0: - return - moov = atoms["moov"] - for atom in moov.findall('stco', True): - self.__update_offset_table(fileobj, ">%dI", atom, delta, offset) - for atom in moov.findall('co64', True): - self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset) - try: - for atom in atoms["moof"].findall('tfhd', True): - self.__update_tfhd(fileobj, atom, delta, offset) - except KeyError: - pass - - def __parse_data(self, atom, data): - pos = 0 - while pos < atom.length - 8: - length, name, flags = struct.unpack(">I4sI", data[pos:pos+12]) - if name != "data": - raise MP4MetadataError( - "unexpected atom %r inside %r" % (name, atom.name)) - yield flags, data[pos+16:pos+length] - pos += length - - def __render_data(self, key, flags, value): - return Atom.render(key, "".join([ - Atom.render("data", struct.pack(">2I", flags, 0) + data) - for data in value])) - - def __parse_freeform(self, atom, data): - length = cdata.uint_be(data[:4]) - mean = data[12:length] - pos = length - length = cdata.uint_be(data[pos:pos+4]) - name = data[pos+12:pos+length] - pos += length - value = [] - while pos < atom.length - 8: - length, atom_name = struct.unpack(">I4s", data[pos:pos+8]) - if atom_name != "data": - raise MP4MetadataError( - "unexpected atom %r inside %r" % (atom_name, atom.name)) - - version = ord(data[pos+8]) - if version != 0: - raise MP4MetadataError("Unsupported version: %r" % version) - - flags = struct.unpack(">I", "\x00" + data[pos+9:pos+12])[0] - value.append(MP4FreeForm(data[pos+16:pos+length], - dataformat=flags)) - pos += length - if value: - self["%s:%s:%s" % (atom.name, mean, name)] = value - - def __render_freeform(self, key, value): - dummy, mean, name = key.split(":", 2) - mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean - name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name - if isinstance(value, basestring): - value = [value] - data = "" - for v in value: - flags = MP4FreeForm.FORMAT_TEXT - if isinstance(v, MP4FreeForm): - flags = v.dataformat - data += struct.pack(">I4s2I", len(v) + 16, "data", flags, 0) - data += v - return Atom.render("----", mean + name + data) - - def __parse_pair(self, atom, data): - self[atom.name] = [struct.unpack(">2H", d[2:6]) for - flags, d in self.__parse_data(atom, data)] - - def __render_pair(self, key, value): - data = [] - for (track, total) in value: - if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: - data.append(struct.pack(">4H", 0, track, total, 0)) - else: - raise MP4MetadataValueError( - "invalid numeric pair %r" % ((track, total),)) - return self.__render_data(key, 0, data) - - def __render_pair_no_trailing(self, key, value): - data = [] - for (track, total) in value: - if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: - data.append(struct.pack(">3H", 0, track, total)) - else: - raise MP4MetadataValueError( - "invalid numeric pair %r" % ((track, total),)) - return self.__render_data(key, 0, data) - - def __parse_genre(self, atom, data): - # Translate to a freeform genre. - genre = cdata.short_be(data[16:18]) - if "\xa9gen" not in self: - try: - self["\xa9gen"] = [GENRES[genre - 1]] - except IndexError: - pass - - def __parse_tempo(self, atom, data): - self[atom.name] = [cdata.ushort_be(value[1]) for - value in self.__parse_data(atom, data)] - - def __render_tempo(self, key, value): - try: - if len(value) == 0: - return self.__render_data(key, 0x15, "") - - if min(value) < 0 or max(value) >= 2**16: - raise MP4MetadataValueError( - "invalid 16 bit integers: %r" % value) - except TypeError: - raise MP4MetadataValueError( - "tmpo must be a list of 16 bit integers") - - values = map(cdata.to_ushort_be, value) - return self.__render_data(key, 0x15, values) - - def __parse_bool(self, atom, data): - try: - self[atom.name] = bool(ord(data[16:17])) - except TypeError: - self[atom.name] = False - - def __render_bool(self, key, value): - return self.__render_data(key, 0x15, [chr(bool(value))]) - - def __parse_cover(self, atom, data): - self[atom.name] = [] - pos = 0 - while pos < atom.length - 8: - length, name, imageformat = struct.unpack(">I4sI", - data[pos:pos+12]) - if name != "data": - if name == "name": - pos += length - continue - raise MP4MetadataError( - "unexpected atom %r inside 'covr'" % name) - if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG): - imageformat = MP4Cover.FORMAT_JPEG - cover = MP4Cover(data[pos+16:pos+length], imageformat) - self[atom.name].append(cover) - pos += length - - def __render_cover(self, key, value): - atom_data = [] - for cover in value: - try: - imageformat = cover.imageformat - except AttributeError: - imageformat = MP4Cover.FORMAT_JPEG - atom_data.append(Atom.render( - "data", struct.pack(">2I", imageformat, 0) + cover)) - return Atom.render(key, "".join(atom_data)) - - def __parse_text(self, atom, data, expected_flags=1): - value = [text.decode('utf-8', 'replace') for flags, text - in self.__parse_data(atom, data) - if flags == expected_flags] - if value: - self[atom.name] = value - - def __render_text(self, key, value, flags=1): - if isinstance(value, basestring): - value = [value] - return self.__render_data( - key, flags, map(utf8, value)) - - def delete(self, filename): - """Remove the metadata from the given filename.""" - - self.clear() - self.save(filename) - - __atoms = { - "----": (__parse_freeform, __render_freeform), - "trkn": (__parse_pair, __render_pair), - "disk": (__parse_pair, __render_pair_no_trailing), - "gnre": (__parse_genre, None), - "tmpo": (__parse_tempo, __render_tempo), - "cpil": (__parse_bool, __render_bool), - "pgap": (__parse_bool, __render_bool), - "pcst": (__parse_bool, __render_bool), - "covr": (__parse_cover, __render_cover), - "purl": (__parse_text, __render_text, 0), - "egid": (__parse_text, __render_text, 0), - } - - # the text atoms we know about which should make loading fail if parsing - # any of them fails - for name in ["\xa9nam", "\xa9alb", "\xa9ART", "aART", "\xa9wrt", "\xa9day", - "\xa9cmt", "desc", "purd", "\xa9grp", "\xa9gen", "\xa9lyr", - "catg", "keyw", "\xa9too", "cprt", "soal", "soaa", "soar", - "sonm", "soco", "sosn", "tvsh"]: - __atoms[name] = (__parse_text, __render_text) - - def pprint(self): - values = [] - for key, value in self.iteritems(): - key = key.decode('latin1') - if key == "covr": - values.append("%s=%s" % (key, ", ".join( - ["[%d bytes of data]" % len(data) for data in value]))) - elif isinstance(value, list): - values.append("%s=%s" % (key, " / ".join(map(unicode, value)))) - else: - values.append("%s=%s" % (key, value)) - return "\n".join(values) - - -class MP4Info(object): - """MPEG-4 stream information. - - Attributes: - - * bitrate -- bitrate in bits per second, as an int - * length -- file length in seconds, as a float - * channels -- number of audio channels - * sample_rate -- audio sampling rate in Hz - * bits_per_sample -- bits per sample - """ - - bitrate = 0 - channels = 0 - sample_rate = 0 - bits_per_sample = 0 - - def __init__(self, atoms, fileobj): - for trak in list(atoms["moov"].findall("trak")): - hdlr = trak["mdia", "hdlr"] - fileobj.seek(hdlr.offset) - data = fileobj.read(hdlr.length) - if data[16:20] == "soun": - break - else: - raise MP4StreamInfoError("track has no audio data") - - mdhd = trak["mdia", "mdhd"] - fileobj.seek(mdhd.offset) - data = fileobj.read(mdhd.length) - if ord(data[8]) == 0: - offset = 20 - fmt = ">2I" - else: - offset = 28 - fmt = ">IQ" - end = offset + struct.calcsize(fmt) - unit, length = struct.unpack(fmt, data[offset:end]) - self.length = float(length) / unit - - try: - atom = trak["mdia", "minf", "stbl", "stsd"] - fileobj.seek(atom.offset) - data = fileobj.read(atom.length) - if data[20:24] == "mp4a": - length = cdata.uint_be(data[16:20]) - (self.channels, self.bits_per_sample, _, - self.sample_rate) = struct.unpack(">3HI", data[40:50]) - # ES descriptor type - if data[56:60] == "esds" and ord(data[64:65]) == 0x03: - pos = 65 - # skip extended descriptor type tag, length, ES ID - # and stream priority - if data[pos:pos+3] == "\x80\x80\x80": - pos += 3 - pos += 4 - # decoder config descriptor type - if ord(data[pos]) == 0x04: - pos += 1 - # skip extended descriptor type tag, length, - # object type ID, stream type, buffer size - # and maximum bitrate - if data[pos:pos+3] == "\x80\x80\x80": - pos += 3 - pos += 10 - # average bitrate - self.bitrate = cdata.uint_be(data[pos:pos+4]) - except (ValueError, KeyError): - # stsd atoms are optional - pass - - def pprint(self): - return "MPEG-4 audio, %.2f seconds, %d bps" % ( - self.length, self.bitrate) - - -class MP4(FileType): - """An MPEG-4 audio file, probably containing AAC. - - If more than one track is present in the file, the first is used. - Only audio ('soun') tracks will be read. - - :ivar info: :class:`MP4Info` - :ivar tags: :class:`MP4Tags` - """ - - MP4Tags = MP4Tags - - _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] - - def load(self, filename): - self.filename = filename - fileobj = open(filename, "rb") - try: - atoms = Atoms(fileobj) - - # ftyp is always the first atom in a valid MP4 file - if not atoms.atoms or atoms.atoms[0].name != "ftyp": - raise error("Not a MP4 file") - - try: - self.info = MP4Info(atoms, fileobj) - except StandardError, err: - raise MP4StreamInfoError, err, sys.exc_info()[2] - - if not MP4Tags._can_load(atoms): - self.tags = None - else: - try: - self.tags = self.MP4Tags(atoms, fileobj) - except StandardError, err: - raise MP4MetadataError, err, sys.exc_info()[2] - finally: - fileobj.close() - - def add_tags(self): - if self.tags is None: - self.tags = self.MP4Tags() - else: - raise error("an MP4 tag already exists") - - @staticmethod - def score(filename, fileobj, header): - return ("ftyp" in header) + ("mp4" in header) - - -Open = MP4 - - -def delete(filename): - """Remove tags from a file.""" - - MP4(filename).delete() diff --git a/lib/mutagen/mp4/__init__.py b/lib/mutagen/mp4/__init__.py new file mode 100644 index 00000000..b60ffcb9 --- /dev/null +++ b/lib/mutagen/mp4/__init__.py @@ -0,0 +1,965 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Read and write MPEG-4 audio files with iTunes metadata. + +This module will read MPEG-4 audio information and metadata, +as found in Apple's MP4 (aka M4A, M4B, M4P) files. + +There is no official specification for this format. The source code +for TagLib, FAAD, and various MPEG specifications at + +* http://developer.apple.com/documentation/QuickTime/QTFF/ +* http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt +* http://standards.iso.org/ittf/PubliclyAvailableStandards/\ +c041828_ISO_IEC_14496-12_2005(E).zip +* http://wiki.multimedia.cx/index.php?title=Apple_QuickTime + +were all consulted. +""" + +import struct +import sys + +from mutagen import FileType, Metadata, StreamInfo +from mutagen._constants import GENRES +from mutagen._util import (cdata, insert_bytes, DictProxy, MutagenError, + hashable, enum) +from mutagen._compat import (reraise, PY2, string_types, text_type, chr_, + iteritems, PY3, cBytesIO) +from ._atom import Atoms, Atom, AtomError +from ._util import parse_full_atom +from ._as_entry import AudioSampleEntry, ASEntryError + + +class error(IOError, MutagenError): + pass + + +class MP4MetadataError(error): + pass + + +class MP4StreamInfoError(error): + pass + + +class MP4MetadataValueError(ValueError, MP4MetadataError): + pass + + +__all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm', 'AtomDataType'] + + +@enum +class AtomDataType(object): + """Enum for `dataformat` attribute of MP4FreeForm. + + .. versionadded:: 1.25 + """ + + IMPLICIT = 0 + """for use with tags for which no type needs to be indicated because + only one type is allowed""" + + UTF8 = 1 + """without any count or null terminator""" + + UTF16 = 2 + """also known as UTF-16BE""" + + SJIS = 3 + """deprecated unless it is needed for special Japanese characters""" + + HTML = 6 + """the HTML file header specifies which HTML version""" + + XML = 7 + """the XML header must identify the DTD or schemas""" + + UUID = 8 + """also known as GUID; stored as 16 bytes in binary (valid as an ID)""" + + ISRC = 9 + """stored as UTF-8 text (valid as an ID)""" + + MI3P = 10 + """stored as UTF-8 text (valid as an ID)""" + + GIF = 12 + """(deprecated) a GIF image""" + + JPEG = 13 + """a JPEG image""" + + PNG = 14 + """PNG image""" + + URL = 15 + """absolute, in UTF-8 characters""" + + DURATION = 16 + """in milliseconds, 32-bit integer""" + + DATETIME = 17 + """in UTC, counting seconds since midnight, January 1, 1904; + 32 or 64-bits""" + + GENRES = 18 + """a list of enumerated values""" + + INTEGER = 21 + """a signed big-endian integer with length one of { 1,2,3,4,8 } bytes""" + + RIAA_PA = 24 + """RIAA parental advisory; { -1=no, 1=yes, 0=unspecified }, + 8-bit ingteger""" + + UPC = 25 + """Universal Product Code, in text UTF-8 format (valid as an ID)""" + + BMP = 27 + """Windows bitmap image""" + + +@hashable +class MP4Cover(bytes): + """A cover artwork. + + Attributes: + + * imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) + """ + + FORMAT_JPEG = AtomDataType.JPEG + FORMAT_PNG = AtomDataType.PNG + + def __new__(cls, data, *args, **kwargs): + return bytes.__new__(cls, data) + + def __init__(self, data, imageformat=FORMAT_JPEG): + self.imageformat = imageformat + + __hash__ = bytes.__hash__ + + def __eq__(self, other): + if not isinstance(other, MP4Cover): + return NotImplemented + + if not bytes.__eq__(self, other): + return False + + if self.imageformat != other.imageformat: + return False + + return True + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "%s(%r, %r)" % ( + type(self).__name__, bytes(self), + AtomDataType(self.imageformat)) + + +@hashable +class MP4FreeForm(bytes): + """A freeform value. + + Attributes: + + * dataformat -- format of the data (see AtomDataType) + """ + + FORMAT_DATA = AtomDataType.IMPLICIT # deprecated + FORMAT_TEXT = AtomDataType.UTF8 # deprecated + + def __new__(cls, data, *args, **kwargs): + return bytes.__new__(cls, data) + + def __init__(self, data, dataformat=AtomDataType.UTF8, version=0): + self.dataformat = dataformat + self.version = version + + __hash__ = bytes.__hash__ + + def __eq__(self, other): + if not isinstance(other, MP4FreeForm): + return NotImplemented + + if not bytes.__eq__(self, other): + return False + + if self.dataformat != other.dataformat: + return False + + if self.version != other.version: + return False + + return True + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "%s(%r, %r)" % ( + type(self).__name__, bytes(self), + AtomDataType(self.dataformat)) + + + +def _name2key(name): + if PY2: + return name + return name.decode("latin-1") + + +def _key2name(key): + if PY2: + return key + return key.encode("latin-1") + + +class MP4Tags(DictProxy, Metadata): + r"""Dictionary containing Apple iTunes metadata list key/values. + + Keys are four byte identifiers, except for freeform ('----') + keys. Values are usually unicode strings, but some atoms have a + special structure: + + Text values (multiple values per key are supported): + + * '\\xa9nam' -- track title + * '\\xa9alb' -- album + * '\\xa9ART' -- artist + * 'aART' -- album artist + * '\\xa9wrt' -- composer + * '\\xa9day' -- year + * '\\xa9cmt' -- comment + * 'desc' -- description (usually used in podcasts) + * 'purd' -- purchase date + * '\\xa9grp' -- grouping + * '\\xa9gen' -- genre + * '\\xa9lyr' -- lyrics + * 'purl' -- podcast URL + * 'egid' -- podcast episode GUID + * 'catg' -- podcast category + * 'keyw' -- podcast keywords + * '\\xa9too' -- encoded by + * 'cprt' -- copyright + * 'soal' -- album sort order + * 'soaa' -- album artist sort order + * 'soar' -- artist sort order + * 'sonm' -- title sort order + * 'soco' -- composer sort order + * 'sosn' -- show sort order + * 'tvsh' -- show name + + Boolean values: + + * 'cpil' -- part of a compilation + * 'pgap' -- part of a gapless album + * 'pcst' -- podcast (iTunes reads this only on import) + + Tuples of ints (multiple values per key are supported): + + * 'trkn' -- track number, total tracks + * 'disk' -- disc number, total discs + + Others: + + * 'tmpo' -- tempo/BPM, 16 bit int + * 'covr' -- cover artwork, list of MP4Cover objects (which are + tagged strs) + * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead. + + The freeform '----' frames use a key in the format '----:mean:name' + where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique + identifier for this frame. The value is a str, but is probably + text that can be decoded as UTF-8. Multiple values per key are + supported. + + MP4 tag data cannot exist outside of the structure of an MP4 file, + so this class should not be manually instantiated. + + Unknown non-text tags and tags that failed to parse will be written + back as is. + """ + + def __init__(self, *args, **kwargs): + self._failed_atoms = {} + super(MP4Tags, self).__init__(*args, **kwargs) + + def load(self, atoms, fileobj): + try: + ilst = atoms[b"moov.udta.meta.ilst"] + except KeyError as key: + raise MP4MetadataError(key) + for atom in ilst.children: + ok, data = atom.read(fileobj) + if not ok: + raise MP4MetadataError("Not enough data") + + try: + if atom.name in self.__atoms: + info = self.__atoms[atom.name] + info[0](self, atom, data) + else: + # unknown atom, try as text + self.__parse_text(atom, data, implicit=False) + except MP4MetadataError: + # parsing failed, save them so we can write them back + key = _name2key(atom.name) + self._failed_atoms.setdefault(key, []).append(data) + + def __setitem__(self, key, value): + if not isinstance(key, str): + raise TypeError("key has to be str") + super(MP4Tags, self).__setitem__(key, value) + + @classmethod + def _can_load(cls, atoms): + return b"moov.udta.meta.ilst" in atoms + + @staticmethod + def __key_sort(item): + (key, v) = item + # iTunes always writes the tags in order of "relevance", try + # to copy it as closely as possible. + order = [b"\xa9nam", b"\xa9ART", b"\xa9wrt", b"\xa9alb", + b"\xa9gen", b"gnre", b"trkn", b"disk", + b"\xa9day", b"cpil", b"pgap", b"pcst", b"tmpo", + b"\xa9too", b"----", b"covr", b"\xa9lyr"] + order = dict(zip(order, range(len(order)))) + last = len(order) + # If there's no key-based way to distinguish, order by length. + # If there's still no way, go by string comparison on the + # values, so we at least have something determinstic. + return (order.get(key[:4], last), len(repr(v)), repr(v)) + + def save(self, filename): + """Save the metadata to the given filename.""" + + values = [] + items = sorted(self.items(), key=self.__key_sort) + for key, value in items: + atom_name = _key2name(key)[:4] + if atom_name in self.__atoms: + render_func = self.__atoms[atom_name][1] + else: + render_func = type(self).__render_text + + try: + values.append(render_func(self, key, value)) + except (TypeError, ValueError) as s: + reraise(MP4MetadataValueError, s, sys.exc_info()[2]) + + for atom_name, failed in iteritems(self._failed_atoms): + # don't write atoms back if we have added a new one with + # the same name, this excludes freeform which can have + # multiple atoms with the same key (most parsers seem to be able + # to handle that) + if atom_name in self: + assert atom_name != b"----" + continue + for data in failed: + values.append(Atom.render(_key2name(atom_name), data)) + + data = Atom.render(b"ilst", b"".join(values)) + + # Find the old atoms. + with open(filename, "rb+") as fileobj: + try: + atoms = Atoms(fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) + + try: + path = atoms.path(b"moov", b"udta", b"meta", b"ilst") + except KeyError: + self.__save_new(fileobj, atoms, data) + else: + self.__save_existing(fileobj, atoms, path, data) + + def __pad_ilst(self, data, length=None): + if length is None: + length = ((len(data) + 1023) & ~1023) - len(data) + return Atom.render(b"free", b"\x00" * length) + + def __save_new(self, fileobj, atoms, ilst): + hdlr = Atom.render(b"hdlr", b"\x00" * 8 + b"mdirappl" + b"\x00" * 9) + meta = Atom.render( + b"meta", b"\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst)) + try: + path = atoms.path(b"moov", b"udta") + except KeyError: + # moov.udta not found -- create one + path = atoms.path(b"moov") + meta = Atom.render(b"udta", meta) + offset = path[-1].offset + 8 + insert_bytes(fileobj, len(meta), offset) + fileobj.seek(offset) + fileobj.write(meta) + self.__update_parents(fileobj, path, len(meta)) + self.__update_offsets(fileobj, atoms, len(meta), offset) + + def __save_existing(self, fileobj, atoms, path, data): + # Replace the old ilst atom. + ilst = path.pop() + offset = ilst.offset + length = ilst.length + + # Check for padding "free" atoms + meta = path[-1] + index = meta.children.index(ilst) + try: + prev = meta.children[index - 1] + if prev.name == b"free": + offset = prev.offset + length += prev.length + except IndexError: + pass + try: + next = meta.children[index + 1] + if next.name == b"free": + length += next.length + except IndexError: + pass + + delta = len(data) - length + if delta > 0 or (delta < 0 and delta > -8): + data += self.__pad_ilst(data) + delta = len(data) - length + insert_bytes(fileobj, delta, offset) + elif delta < 0: + data += self.__pad_ilst(data, -delta - 8) + delta = 0 + + fileobj.seek(offset) + fileobj.write(data) + self.__update_parents(fileobj, path, delta) + self.__update_offsets(fileobj, atoms, delta, offset) + + def __update_parents(self, fileobj, path, delta): + """Update all parent atoms with the new size.""" + for atom in path: + fileobj.seek(atom.offset) + size = cdata.uint_be(fileobj.read(4)) + if size == 1: # 64bit + # skip name (4B) and read size (8B) + size = cdata.ulonglong_be(fileobj.read(12)[4:]) + fileobj.seek(atom.offset + 8) + fileobj.write(cdata.to_ulonglong_be(size + delta)) + else: # 32bit + fileobj.seek(atom.offset) + fileobj.write(cdata.to_uint_be(size + delta)) + + def __update_offset_table(self, fileobj, fmt, atom, delta, offset): + """Update offset table in the specified atom.""" + if atom.offset > offset: + atom.offset += delta + fileobj.seek(atom.offset + 12) + data = fileobj.read(atom.length - 12) + fmt = fmt % cdata.uint_be(data[:4]) + offsets = struct.unpack(fmt, data[4:]) + offsets = [o + (0, delta)[offset < o] for o in offsets] + fileobj.seek(atom.offset + 16) + fileobj.write(struct.pack(fmt, *offsets)) + + def __update_tfhd(self, fileobj, atom, delta, offset): + if atom.offset > offset: + atom.offset += delta + fileobj.seek(atom.offset + 9) + data = fileobj.read(atom.length - 9) + flags = cdata.uint_be(b"\x00" + data[:3]) + if flags & 1: + o = cdata.ulonglong_be(data[7:15]) + if o > offset: + o += delta + fileobj.seek(atom.offset + 16) + fileobj.write(cdata.to_ulonglong_be(o)) + + def __update_offsets(self, fileobj, atoms, delta, offset): + """Update offset tables in all 'stco' and 'co64' atoms.""" + if delta == 0: + return + moov = atoms[b"moov"] + for atom in moov.findall(b'stco', True): + self.__update_offset_table(fileobj, ">%dI", atom, delta, offset) + for atom in moov.findall(b'co64', True): + self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset) + try: + for atom in atoms[b"moof"].findall(b'tfhd', True): + self.__update_tfhd(fileobj, atom, delta, offset) + except KeyError: + pass + + def __parse_data(self, atom, data): + pos = 0 + while pos < atom.length - 8: + head = data[pos:pos + 12] + if len(head) != 12: + raise MP4MetadataError("truncated atom % r" % atom.name) + length, name = struct.unpack(">I4s", head[:8]) + version = ord(head[8:9]) + flags = struct.unpack(">I", b"\x00" + head[9:12])[0] + if name != b"data": + raise MP4MetadataError( + "unexpected atom %r inside %r" % (name, atom.name)) + + chunk = data[pos + 16:pos + length] + if len(chunk) != length - 16: + raise MP4MetadataError("truncated atom % r" % atom.name) + yield version, flags, chunk + pos += length + + def __add(self, key, value, single=False): + assert isinstance(key, str) + + if single: + self[key] = value + else: + self.setdefault(key, []).extend(value) + + def __render_data(self, key, version, flags, value): + return Atom.render(_key2name(key), b"".join([ + Atom.render( + b"data", struct.pack(">2I", version << 24 | flags, 0) + data) + for data in value])) + + def __parse_freeform(self, atom, data): + length = cdata.uint_be(data[:4]) + mean = data[12:length] + pos = length + length = cdata.uint_be(data[pos:pos + 4]) + name = data[pos + 12:pos + length] + pos += length + value = [] + while pos < atom.length - 8: + length, atom_name = struct.unpack(">I4s", data[pos:pos + 8]) + if atom_name != b"data": + raise MP4MetadataError( + "unexpected atom %r inside %r" % (atom_name, atom.name)) + + version = ord(data[pos + 8:pos + 8 + 1]) + flags = struct.unpack(">I", b"\x00" + data[pos + 9:pos + 12])[0] + value.append(MP4FreeForm(data[pos + 16:pos + length], + dataformat=flags, version=version)) + pos += length + + key = _name2key(atom.name + b":" + mean + b":" + name) + self.__add(key, value) + + def __render_freeform(self, key, value): + if isinstance(value, bytes): + value = [value] + + dummy, mean, name = _key2name(key).split(b":", 2) + mean = struct.pack(">I4sI", len(mean) + 12, b"mean", 0) + mean + name = struct.pack(">I4sI", len(name) + 12, b"name", 0) + name + + data = b"" + for v in value: + flags = AtomDataType.UTF8 + version = 0 + if isinstance(v, MP4FreeForm): + flags = v.dataformat + version = v.version + + data += struct.pack( + ">I4s2I", len(v) + 16, b"data", version << 24 | flags, 0) + data += v + + return Atom.render(b"----", mean + name + data) + + def __parse_pair(self, atom, data): + key = _name2key(atom.name) + values = [struct.unpack(">2H", d[2:6]) for + version, flags, d in self.__parse_data(atom, data)] + self.__add(key, values) + + def __render_pair(self, key, value): + data = [] + for (track, total) in value: + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data.append(struct.pack(">4H", 0, track, total, 0)) + else: + raise MP4MetadataValueError( + "invalid numeric pair %r" % ((track, total),)) + return self.__render_data(key, 0, AtomDataType.IMPLICIT, data) + + def __render_pair_no_trailing(self, key, value): + data = [] + for (track, total) in value: + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data.append(struct.pack(">3H", 0, track, total)) + else: + raise MP4MetadataValueError( + "invalid numeric pair %r" % ((track, total),)) + return self.__render_data(key, 0, AtomDataType.IMPLICIT, data) + + def __parse_genre(self, atom, data): + values = [] + for version, flags, data in self.__parse_data(atom, data): + # version = 0, flags = 0 + if len(data) != 2: + raise MP4MetadataValueError("invalid genre") + genre = cdata.short_be(data) + # Translate to a freeform genre. + try: + genre = GENRES[genre - 1] + except IndexError: + # this will make us write it back at least + raise MP4MetadataValueError("unknown genre") + values.append(genre) + key = _name2key(b"\xa9gen") + self.__add(key, values) + + def __parse_tempo(self, atom, data): + values = [] + for version, flags, data in self.__parse_data(atom, data): + # version = 0, flags = 0 or 21 + if len(data) != 2: + raise MP4MetadataValueError("invalid tempo") + values.append(cdata.ushort_be(data)) + key = _name2key(atom.name) + self.__add(key, values) + + def __render_tempo(self, key, value): + try: + if len(value) == 0: + return self.__render_data(key, 0, AtomDataType.INTEGER, b"") + + if (min(value) < 0) or (max(value) >= 2 ** 16): + raise MP4MetadataValueError( + "invalid 16 bit integers: %r" % value) + except TypeError: + raise MP4MetadataValueError( + "tmpo must be a list of 16 bit integers") + + values = [cdata.to_ushort_be(v) for v in value] + return self.__render_data(key, 0, AtomDataType.INTEGER, values) + + def __parse_bool(self, atom, data): + for version, flags, data in self.__parse_data(atom, data): + if len(data) != 1: + raise MP4MetadataValueError("invalid bool") + + value = bool(ord(data)) + key = _name2key(atom.name) + self.__add(key, value, single=True) + + def __render_bool(self, key, value): + return self.__render_data( + key, 0, AtomDataType.INTEGER, [chr_(bool(value))]) + + def __parse_cover(self, atom, data): + values = [] + pos = 0 + while pos < atom.length - 8: + length, name, imageformat = struct.unpack(">I4sI", + data[pos:pos + 12]) + if name != b"data": + if name == b"name": + pos += length + continue + raise MP4MetadataError( + "unexpected atom %r inside 'covr'" % name) + if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG): + # Sometimes AtomDataType.IMPLICIT or simply wrong. + # In all cases it was jpeg, so default to it + imageformat = MP4Cover.FORMAT_JPEG + cover = MP4Cover(data[pos + 16:pos + length], imageformat) + values.append(cover) + pos += length + + key = _name2key(atom.name) + self.__add(key, values) + + def __render_cover(self, key, value): + atom_data = [] + for cover in value: + try: + imageformat = cover.imageformat + except AttributeError: + imageformat = MP4Cover.FORMAT_JPEG + atom_data.append(Atom.render( + b"data", struct.pack(">2I", imageformat, 0) + cover)) + return Atom.render(_key2name(key), b"".join(atom_data)) + + def __parse_text(self, atom, data, implicit=True): + # implicit = False, for parsing unknown atoms only take utf8 ones. + # For known ones we can assume the implicit are utf8 too. + values = [] + for version, flags, atom_data in self.__parse_data(atom, data): + if implicit: + if flags not in (AtomDataType.IMPLICIT, AtomDataType.UTF8): + raise MP4MetadataError( + "Unknown atom type %r for %r" % (flags, atom.name)) + else: + if flags != AtomDataType.UTF8: + raise MP4MetadataError( + "%r is not text, ignore" % atom.name) + + try: + text = atom_data.decode("utf-8") + except UnicodeDecodeError as e: + raise MP4MetadataError("%s: %s" % (atom.name, e)) + + values.append(text) + + key = _name2key(atom.name) + self.__add(key, values) + + def __render_text(self, key, value, flags=AtomDataType.UTF8): + if isinstance(value, string_types): + value = [value] + + encoded = [] + for v in value: + if not isinstance(v, text_type): + if PY3: + raise TypeError("%r not str" % v) + v = v.decode("utf-8") + encoded.append(v.encode("utf-8")) + + return self.__render_data(key, 0, flags, encoded) + + def delete(self, filename): + """Remove the metadata from the given filename.""" + + self._failed_atoms.clear() + self.clear() + self.save(filename) + + __atoms = { + b"----": (__parse_freeform, __render_freeform), + b"trkn": (__parse_pair, __render_pair), + b"disk": (__parse_pair, __render_pair_no_trailing), + b"gnre": (__parse_genre, None), + b"tmpo": (__parse_tempo, __render_tempo), + b"cpil": (__parse_bool, __render_bool), + b"pgap": (__parse_bool, __render_bool), + b"pcst": (__parse_bool, __render_bool), + b"covr": (__parse_cover, __render_cover), + b"purl": (__parse_text, __render_text), + b"egid": (__parse_text, __render_text), + } + + # these allow implicit flags and parse as text + for name in [b"\xa9nam", b"\xa9alb", b"\xa9ART", b"aART", b"\xa9wrt", + b"\xa9day", b"\xa9cmt", b"desc", b"purd", b"\xa9grp", + b"\xa9gen", b"\xa9lyr", b"catg", b"keyw", b"\xa9too", + b"cprt", b"soal", b"soaa", b"soar", b"sonm", b"soco", + b"sosn", b"tvsh"]: + __atoms[name] = (__parse_text, __render_text) + + def pprint(self): + values = [] + for key, value in iteritems(self): + if not isinstance(key, text_type): + key = key.decode("latin-1") + if key == "covr": + values.append("%s=%s" % (key, ", ".join( + ["[%d bytes of data]" % len(data) for data in value]))) + elif isinstance(value, list): + values.append("%s=%s" % + (key, " / ".join(map(text_type, value)))) + else: + values.append("%s=%s" % (key, value)) + return "\n".join(values) + + +class MP4Info(StreamInfo): + """MPEG-4 stream information. + + Attributes: + + * bitrate -- bitrate in bits per second, as an int + * length -- file length in seconds, as a float + * channels -- number of audio channels + * sample_rate -- audio sampling rate in Hz + * bits_per_sample -- bits per sample + * codec (string): + * if starting with ``"mp4a"`` uses an mp4a audio codec + (see the codec parameter in rfc6381 for details e.g. ``"mp4a.40.2"``) + * for everything else see a list of possible values at + http://www.mp4ra.org/codecs.html + + e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. + * codec_description (string): + Name of the codec used (ALAC, AAC LC, AC-3...). Values might change in + the future, use for display purposes only. + """ + + bitrate = 0 + channels = 0 + sample_rate = 0 + bits_per_sample = 0 + codec = u"" + codec_name = u"" + + def __init__(self, atoms, fileobj): + try: + moov = atoms[b"moov"] + except KeyError: + raise MP4StreamInfoError("not a MP4 file") + + for trak in moov.findall(b"trak"): + hdlr = trak[b"mdia", b"hdlr"] + ok, data = hdlr.read(fileobj) + if not ok: + raise MP4StreamInfoError("Not enough data") + if data[8:12] == b"soun": + break + else: + raise MP4StreamInfoError("track has no audio data") + + mdhd = trak[b"mdia", b"mdhd"] + ok, data = mdhd.read(fileobj) + if not ok: + raise MP4StreamInfoError("Not enough data") + + try: + version, flags, data = parse_full_atom(data) + except ValueError as e: + raise MP4StreamInfoError(e) + + if version == 0: + offset = 8 + fmt = ">2I" + elif version == 1: + offset = 16 + fmt = ">IQ" + else: + raise MP4StreamInfoError("Unknown mdhd version %d" % version) + + end = offset + struct.calcsize(fmt) + unit, length = struct.unpack(fmt, data[offset:end]) + try: + self.length = float(length) / unit + except ZeroDivisionError: + self.length = 0 + + try: + atom = trak[b"mdia", b"minf", b"stbl", b"stsd"] + except KeyError: + pass + else: + self._parse_stsd(atom, fileobj) + + def _parse_stsd(self, atom, fileobj): + """Sets channels, bits_per_sample, sample_rate and optionally bitrate. + + Can raise MP4StreamInfoError. + """ + + assert atom.name == b"stsd" + + ok, data = atom.read(fileobj) + if not ok: + raise MP4StreamInfoError("Invalid stsd") + + try: + version, flags, data = parse_full_atom(data) + except ValueError as e: + raise MP4StreamInfoError(e) + + if version != 0: + raise MP4StreamInfoError("Unsupported stsd version") + + try: + num_entries, offset = cdata.uint32_be_from(data, 0) + except cdata.error as e: + raise MP4StreamInfoError(e) + + if num_entries == 0: + return + + # look at the first entry if there is one + entry_fileobj = cBytesIO(data[offset:]) + try: + entry_atom = Atom(entry_fileobj) + except AtomError as e: + raise MP4StreamInfoError(e) + + try: + entry = AudioSampleEntry(entry_atom, entry_fileobj) + except ASEntryError as e: + raise MP4StreamInfoError(e) + else: + self.channels = entry.channels + self.bits_per_sample = entry.sample_size + self.sample_rate = entry.sample_rate + self.bitrate = entry.bitrate + self.codec = entry.codec + self.codec_description = entry.codec_description + + def pprint(self): + return "MPEG-4 audio (%s), %.2f seconds, %d bps" % ( + self.codec_description, self.length, self.bitrate) + + +class MP4(FileType): + """An MPEG-4 audio file, probably containing AAC. + + If more than one track is present in the file, the first is used. + Only audio ('soun') tracks will be read. + + :ivar info: :class:`MP4Info` + :ivar tags: :class:`MP4Tags` + """ + + MP4Tags = MP4Tags + + _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] + + def load(self, filename): + self.filename = filename + with open(filename, "rb") as fileobj: + try: + atoms = Atoms(fileobj) + except AtomError as err: + reraise(error, err, sys.exc_info()[2]) + + try: + self.info = MP4Info(atoms, fileobj) + except error: + raise + except Exception as err: + reraise(MP4StreamInfoError, err, sys.exc_info()[2]) + + if not MP4Tags._can_load(atoms): + self.tags = None + else: + try: + self.tags = self.MP4Tags(atoms, fileobj) + except error: + raise + except Exception as err: + reraise(MP4MetadataError, err, sys.exc_info()[2]) + + def add_tags(self): + if self.tags is None: + self.tags = self.MP4Tags() + else: + raise error("an MP4 tag already exists") + + @staticmethod + def score(filename, fileobj, header_data): + return (b"ftyp" in header_data) + (b"mp4" in header_data) + + +Open = MP4 + + +def delete(filename): + """Remove tags from a file.""" + + MP4(filename).delete() diff --git a/lib/mutagen/mp4/_as_entry.py b/lib/mutagen/mp4/_as_entry.py new file mode 100644 index 00000000..1cc4ff88 --- /dev/null +++ b/lib/mutagen/mp4/_as_entry.py @@ -0,0 +1,541 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2014 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from mutagen._compat import cBytesIO, xrange +from mutagen.aac import ProgramConfigElement +from mutagen._util import BitReader, BitReaderError, cdata, text_type +from ._util import parse_full_atom +from ._atom import Atom, AtomError + + +class ASEntryError(Exception): + pass + + +class AudioSampleEntry(object): + """Parses an AudioSampleEntry atom. + + Private API. + + Attrs: + channels (int): number of channels + sample_size (int): sample size in bits + sample_rate (int): sample rate in Hz + bitrate (int): bits per second (0 means unknown) + codec (string): + audio codec, either 'mp4a[.*][.*]' (rfc6381) or 'alac' + codec_description (string): descriptive codec name e.g. "AAC LC+SBR" + + Can raise ASEntryError. + """ + + channels = 0 + sample_size = 0 + sample_rate = 0 + bitrate = 0 + codec = None + codec_description = None + + def __init__(self, atom, fileobj): + ok, data = atom.read(fileobj) + if not ok: + raise ASEntryError("too short %r atom" % atom.name) + + fileobj = cBytesIO(data) + r = BitReader(fileobj) + + try: + # SampleEntry + r.skip(6 * 8) # reserved + r.skip(2 * 8) # data_ref_index + + # AudioSampleEntry + r.skip(8 * 8) # reserved + self.channels = r.bits(16) + self.sample_size = r.bits(16) + r.skip(2 * 8) # pre_defined + r.skip(2 * 8) # reserved + self.sample_rate = r.bits(32) >> 16 + except BitReaderError as e: + raise ASEntryError(e) + + assert r.is_aligned() + + try: + extra = Atom(fileobj) + except AtomError as e: + raise ASEntryError(e) + + self.codec = atom.name.decode("latin-1") + self.codec_description = None + + if atom.name == b"mp4a" and extra.name == b"esds": + self._parse_esds(extra, fileobj) + elif atom.name == b"alac" and extra.name == b"alac": + self._parse_alac(extra, fileobj) + elif atom.name == b"ac-3" and extra.name == b"dac3": + self._parse_dac3(extra, fileobj) + + if self.codec_description is None: + self.codec_description = self.codec.upper() + + def _parse_dac3(self, atom, fileobj): + # ETSI TS 102 366 + + assert atom.name == b"dac3" + + ok, data = atom.read(fileobj) + if not ok: + raise ASEntryError("truncated %s atom" % atom.name) + fileobj = cBytesIO(data) + r = BitReader(fileobj) + + # sample_rate in AudioSampleEntry covers values in + # fscod2 and not just fscod, so ignore fscod here. + try: + r.skip(2 + 5 + 3) # fscod, bsid, bsmod + acmod = r.bits(3) + lfeon = r.bits(1) + bit_rate_code = r.bits(5) + r.skip(5) # reserved + except BitReaderError as e: + raise ASEntryError(e) + + self.channels = [2, 1, 2, 3, 3, 4, 4, 5][acmod] + lfeon + + try: + self.bitrate = [ + 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, + 224, 256, 320, 384, 448, 512, 576, 640][bit_rate_code] * 1000 + except IndexError: + pass + + def _parse_alac(self, atom, fileobj): + # https://alac.macosforge.org/trac/browser/trunk/ + # ALACMagicCookieDescription.txt + + assert atom.name == b"alac" + + ok, data = atom.read(fileobj) + if not ok: + raise ASEntryError("truncated %s atom" % atom.name) + + try: + version, flags, data = parse_full_atom(data) + except ValueError as e: + raise ASEntryError(e) + + if version != 0: + raise ASEntryError("Unsupported version %d" % version) + + fileobj = cBytesIO(data) + r = BitReader(fileobj) + + try: + # for some files the AudioSampleEntry values default to 44100/2chan + # and the real info is in the alac cookie, so prefer it + r.skip(32) # frameLength + compatibleVersion = r.bits(8) + if compatibleVersion != 0: + return + self.sample_size = r.bits(8) + r.skip(8 + 8 + 8) + self.channels = r.bits(8) + r.skip(16 + 32) + self.bitrate = r.bits(32) + self.sample_rate = r.bits(32) + except BitReaderError as e: + raise ASEntryError(e) + + def _parse_esds(self, esds, fileobj): + assert esds.name == b"esds" + + ok, data = esds.read(fileobj) + if not ok: + raise ASEntryError("truncated %s atom" % esds.name) + + try: + version, flags, data = parse_full_atom(data) + except ValueError as e: + raise ASEntryError(e) + + if version != 0: + raise ASEntryError("Unsupported version %d" % version) + + fileobj = cBytesIO(data) + r = BitReader(fileobj) + + try: + tag = r.bits(8) + if tag != ES_Descriptor.TAG: + raise ASEntryError("unexpected descriptor: %d" % tag) + assert r.is_aligned() + except BitReaderError as e: + raise ASEntryError(e) + + try: + decSpecificInfo = ES_Descriptor.parse(fileobj) + except DescriptorError as e: + raise ASEntryError(e) + dec_conf_desc = decSpecificInfo.decConfigDescr + + self.bitrate = dec_conf_desc.avgBitrate + self.codec += dec_conf_desc.codec_param + self.codec_description = dec_conf_desc.codec_desc + + decSpecificInfo = dec_conf_desc.decSpecificInfo + if decSpecificInfo is not None: + if decSpecificInfo.channels != 0: + self.channels = decSpecificInfo.channels + + if decSpecificInfo.sample_rate != 0: + self.sample_rate = decSpecificInfo.sample_rate + + +class DescriptorError(Exception): + pass + + +class BaseDescriptor(object): + + TAG = None + + @classmethod + def _parse_desc_length_file(cls, fileobj): + """May raise ValueError""" + + value = 0 + for i in xrange(4): + try: + b = cdata.uint8(fileobj.read(1)) + except cdata.error as e: + raise ValueError(e) + value = (value << 7) | (b & 0x7f) + if not b >> 7: + break + else: + raise ValueError("invalid descriptor length") + + return value + + @classmethod + def parse(cls, fileobj): + """Returns a parsed instance of the called type. + The file position is right after the descriptor after this returns. + + Raises DescriptorError + """ + + try: + length = cls._parse_desc_length_file(fileobj) + except ValueError as e: + raise DescriptorError(e) + pos = fileobj.tell() + instance = cls(fileobj, length) + left = length - (fileobj.tell() - pos) + if left < 0: + raise DescriptorError("descriptor parsing read too much data") + fileobj.seek(left, 1) + return instance + + +class ES_Descriptor(BaseDescriptor): + + TAG = 0x3 + + def __init__(self, fileobj, length): + """Raises DescriptorError""" + + r = BitReader(fileobj) + try: + self.ES_ID = r.bits(16) + self.streamDependenceFlag = r.bits(1) + self.URL_Flag = r.bits(1) + self.OCRstreamFlag = r.bits(1) + self.streamPriority = r.bits(5) + if self.streamDependenceFlag: + self.dependsOn_ES_ID = r.bits(16) + if self.URL_Flag: + URLlength = r.bits(8) + self.URLstring = r.bytes(URLlength) + if self.OCRstreamFlag: + self.OCR_ES_Id = r.bits(16) + + tag = r.bits(8) + except BitReaderError as e: + raise DescriptorError(e) + + if tag != DecoderConfigDescriptor.TAG: + raise DescriptorError("unexpected DecoderConfigDescrTag %d" % tag) + + assert r.is_aligned() + self.decConfigDescr = DecoderConfigDescriptor.parse(fileobj) + + +class DecoderConfigDescriptor(BaseDescriptor): + + TAG = 0x4 + + decSpecificInfo = None + """A DecoderSpecificInfo, optional""" + + def __init__(self, fileobj, length): + """Raises DescriptorError""" + + r = BitReader(fileobj) + + try: + self.objectTypeIndication = r.bits(8) + self.streamType = r.bits(6) + self.upStream = r.bits(1) + self.reserved = r.bits(1) + self.bufferSizeDB = r.bits(24) + self.maxBitrate = r.bits(32) + self.avgBitrate = r.bits(32) + + if (self.objectTypeIndication, self.streamType) != (0x40, 0x5): + return + + # all from here is optional + if length * 8 == r.get_position(): + return + + tag = r.bits(8) + except BitReaderError as e: + raise DescriptorError(e) + + if tag == DecoderSpecificInfo.TAG: + assert r.is_aligned() + self.decSpecificInfo = DecoderSpecificInfo.parse(fileobj) + + @property + def codec_param(self): + """string""" + + param = u".%X" % self.objectTypeIndication + info = self.decSpecificInfo + if info is not None: + param += u".%d" % info.audioObjectType + return param + + @property + def codec_desc(self): + """string or None""" + + info = self.decSpecificInfo + desc = None + if info is not None: + desc = info.description + return desc + + +class DecoderSpecificInfo(BaseDescriptor): + + TAG = 0x5 + + _TYPE_NAMES = [ + None, "AAC MAIN", "AAC LC", "AAC SSR", "AAC LTP", "SBR", + "AAC scalable", "TwinVQ", "CELP", "HVXC", None, None, "TTSI", + "Main synthetic", "Wavetable synthesis", "General MIDI", + "Algorithmic Synthesis and Audio FX", "ER AAC LC", None, "ER AAC LTP", + "ER AAC scalable", "ER Twin VQ", "ER BSAC", "ER AAC LD", "ER CELP", + "ER HVXC", "ER HILN", "ER Parametric", "SSC", "PS", "MPEG Surround", + None, "Layer-1", "Layer-2", "Layer-3", "DST", "ALS", "SLS", + "SLS non-core", "ER AAC ELD", "SMR Simple", "SMR Main", "USAC", + "SAOC", "LD MPEG Surround", "USAC" + ] + + _FREQS = [ + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, + 12000, 11025, 8000, 7350, + ] + + @property + def description(self): + """string or None if unknown""" + + name = None + try: + name = self._TYPE_NAMES[self.audioObjectType] + except IndexError: + pass + if name is None: + return + if self.sbrPresentFlag == 1: + name += "+SBR" + if self.psPresentFlag == 1: + name += "+PS" + return text_type(name) + + @property + def sample_rate(self): + """0 means unknown""" + + if self.sbrPresentFlag == 1: + return self.extensionSamplingFrequency + elif self.sbrPresentFlag == 0: + return self.samplingFrequency + else: + # these are all types that support SBR + aot_can_sbr = (1, 2, 3, 4, 6, 17, 19, 20, 22) + if self.audioObjectType not in aot_can_sbr: + return self.samplingFrequency + # there shouldn't be SBR for > 48KHz + if self.samplingFrequency > 24000: + return self.samplingFrequency + # either samplingFrequency or samplingFrequency * 2 + return 0 + + @property + def channels(self): + """channel count or 0 for unknown""" + + # from ProgramConfigElement() + if hasattr(self, "pce_channels"): + return self.pce_channels + + conf = getattr( + self, "extensionChannelConfiguration", self.channelConfiguration) + + if conf == 1: + if self.psPresentFlag == -1: + return 0 + elif self.psPresentFlag == 1: + return 2 + else: + return 1 + elif conf == 7: + return 8 + elif conf > 7: + return 0 + else: + return conf + + def _get_audio_object_type(self, r): + """Raises BitReaderError""" + + audioObjectType = r.bits(5) + if audioObjectType == 31: + audioObjectTypeExt = r.bits(6) + audioObjectType = 32 + audioObjectTypeExt + return audioObjectType + + def _get_sampling_freq(self, r): + """Raises BitReaderError""" + + samplingFrequencyIndex = r.bits(4) + if samplingFrequencyIndex == 0xf: + samplingFrequency = r.bits(24) + else: + try: + samplingFrequency = self._FREQS[samplingFrequencyIndex] + except IndexError: + samplingFrequency = 0 + return samplingFrequency + + def __init__(self, fileobj, length): + """Raises DescriptorError""" + + r = BitReader(fileobj) + try: + self._parse(r, length) + except BitReaderError as e: + raise DescriptorError(e) + + def _parse(self, r, length): + """Raises BitReaderError""" + + def bits_left(): + return length * 8 - r.get_position() + + self.audioObjectType = self._get_audio_object_type(r) + self.samplingFrequency = self._get_sampling_freq(r) + self.channelConfiguration = r.bits(4) + + self.sbrPresentFlag = -1 + self.psPresentFlag = -1 + if self.audioObjectType in (5, 29): + self.extensionAudioObjectType = 5 + self.sbrPresentFlag = 1 + if self.audioObjectType == 29: + self.psPresentFlag = 1 + self.extensionSamplingFrequency = self._get_sampling_freq(r) + self.audioObjectType = self._get_audio_object_type(r) + if self.audioObjectType == 22: + self.extensionChannelConfiguration = r.bits(4) + else: + self.extensionAudioObjectType = 0 + + if self.audioObjectType in (1, 2, 3, 4, 6, 7, 17, 19, 20, 21, 22, 23): + try: + GASpecificConfig(r, self) + except NotImplementedError: + # unsupported, (warn?) + return + else: + # unsupported + return + + if self.audioObjectType in ( + 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 39): + epConfig = r.bits(2) + if epConfig in (2, 3): + # unsupported + return + + if self.extensionAudioObjectType != 5 and bits_left() >= 16: + syncExtensionType = r.bits(11) + if syncExtensionType == 0x2b7: + self.extensionAudioObjectType = self._get_audio_object_type(r) + + if self.extensionAudioObjectType == 5: + self.sbrPresentFlag = r.bits(1) + if self.sbrPresentFlag == 1: + self.extensionSamplingFrequency = \ + self._get_sampling_freq(r) + if bits_left() >= 12: + syncExtensionType = r.bits(11) + if syncExtensionType == 0x548: + self.psPresentFlag = r.bits(1) + + if self.extensionAudioObjectType == 22: + self.sbrPresentFlag = r.bits(1) + if self.sbrPresentFlag == 1: + self.extensionSamplingFrequency = \ + self._get_sampling_freq(r) + self.extensionChannelConfiguration = r.bits(4) + + +def GASpecificConfig(r, info): + """Reads GASpecificConfig which is needed to get the data after that + (there is no length defined to skip it) and to read program_config_element + which can contain channel counts. + + May raise BitReaderError on error or + NotImplementedError if some reserved data was set. + """ + + assert isinstance(info, DecoderSpecificInfo) + + r.skip(1) # frameLengthFlag + dependsOnCoreCoder = r.bits(1) + if dependsOnCoreCoder: + r.skip(14) + extensionFlag = r.bits(1) + if not info.channelConfiguration: + pce = ProgramConfigElement(r) + info.pce_channels = pce.channels + if info.audioObjectType == 6 or info.audioObjectType == 20: + r.skip(3) + if extensionFlag: + if info.audioObjectType == 22: + r.skip(5 + 11) + if info.audioObjectType in (17, 19, 20, 23): + r.skip(1 + 1 + 1) + extensionFlag3 = r.bits(1) + if extensionFlag3 != 0: + raise NotImplementedError("extensionFlag3 set") diff --git a/lib/mutagen/mp4/_atom.py b/lib/mutagen/mp4/_atom.py new file mode 100644 index 00000000..7567fa10 --- /dev/null +++ b/lib/mutagen/mp4/_atom.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import struct + +from mutagen._compat import PY2 + +# This is not an exhaustive list of container atoms, but just the +# ones this module needs to peek inside. +_CONTAINERS = [b"moov", b"udta", b"trak", b"mdia", b"meta", b"ilst", + b"stbl", b"minf", b"moof", b"traf"] +_SKIP_SIZE = {b"meta": 4} + + +class AtomError(Exception): + pass + + +class Atom(object): + """An individual atom. + + Attributes: + children -- list child atoms (or None for non-container atoms) + length -- length of this atom, including length and name + name -- four byte name of the atom, as a str + offset -- location in the constructor-given fileobj of this atom + + This structure should only be used internally by Mutagen. + """ + + children = None + + def __init__(self, fileobj, level=0): + """May raise AtomError""" + + self.offset = fileobj.tell() + try: + self.length, self.name = struct.unpack(">I4s", fileobj.read(8)) + except struct.error: + raise AtomError("truncated data") + self._dataoffset = self.offset + 8 + if self.length == 1: + try: + self.length, = struct.unpack(">Q", fileobj.read(8)) + except struct.error: + raise AtomError("truncated data") + self._dataoffset += 8 + if self.length < 16: + raise AtomError( + "64 bit atom length can only be 16 and higher") + elif self.length == 0: + if level != 0: + raise AtomError( + "only a top-level atom can have zero length") + # Only the last atom is supposed to have a zero-length, meaning it + # extends to the end of file. + fileobj.seek(0, 2) + self.length = fileobj.tell() - self.offset + fileobj.seek(self.offset + 8, 0) + elif self.length < 8: + raise AtomError( + "atom length can only be 0, 1 or 8 and higher") + + if self.name in _CONTAINERS: + self.children = [] + fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1) + while fileobj.tell() < self.offset + self.length: + self.children.append(Atom(fileobj, level + 1)) + else: + fileobj.seek(self.offset + self.length, 0) + + def read(self, fileobj): + """Return if all data could be read and the atom payload""" + + fileobj.seek(self._dataoffset, 0) + length = self.length - (self._dataoffset - self.offset) + data = fileobj.read(length) + return len(data) == length, data + + @staticmethod + def render(name, data): + """Render raw atom data.""" + # this raises OverflowError if Py_ssize_t can't handle the atom data + size = len(data) + 8 + if size <= 0xFFFFFFFF: + return struct.pack(">I4s", size, name) + data + else: + return struct.pack(">I4sQ", 1, name, size + 8) + data + + def findall(self, name, recursive=False): + """Recursively find all child atoms by specified name.""" + if self.children is not None: + for child in self.children: + if child.name == name: + yield child + if recursive: + for atom in child.findall(name, True): + yield atom + + def __getitem__(self, remaining): + """Look up a child atom, potentially recursively. + + e.g. atom['udta', 'meta'] => + """ + if not remaining: + return self + elif self.children is None: + raise KeyError("%r is not a container" % self.name) + for child in self.children: + if child.name == remaining[0]: + return child[remaining[1:]] + else: + raise KeyError("%r not found" % remaining[0]) + + def __repr__(self): + cls = self.__class__.__name__ + if self.children is None: + return "<%s name=%r length=%r offset=%r>" % ( + cls, self.name, self.length, self.offset) + else: + children = "\n".join([" " + line for child in self.children + for line in repr(child).splitlines()]) + return "<%s name=%r length=%r offset=%r\n%s>" % ( + cls, self.name, self.length, self.offset, children) + + +class Atoms(object): + """Root atoms in a given file. + + Attributes: + atoms -- a list of top-level atoms as Atom objects + + This structure should only be used internally by Mutagen. + """ + + def __init__(self, fileobj): + self.atoms = [] + fileobj.seek(0, 2) + end = fileobj.tell() + fileobj.seek(0) + while fileobj.tell() + 8 <= end: + self.atoms.append(Atom(fileobj)) + + def path(self, *names): + """Look up and return the complete path of an atom. + + For example, atoms.path('moov', 'udta', 'meta') will return a + list of three atoms, corresponding to the moov, udta, and meta + atoms. + """ + + path = [self] + for name in names: + path.append(path[-1][name, ]) + return path[1:] + + def __contains__(self, names): + try: + self[names] + except KeyError: + return False + return True + + def __getitem__(self, names): + """Look up a child atom. + + 'names' may be a list of atoms (['moov', 'udta']) or a string + specifying the complete path ('moov.udta'). + """ + + if PY2: + if isinstance(names, basestring): + names = names.split(b".") + else: + if isinstance(names, bytes): + names = names.split(b".") + + for child in self.atoms: + if child.name == names[0]: + return child[names[1:]] + else: + raise KeyError("%s not found" % names[0]) + + def __repr__(self): + return "\n".join([repr(child) for child in self.atoms]) diff --git a/lib/mutagen/mp4/_util.py b/lib/mutagen/mp4/_util.py new file mode 100644 index 00000000..9583334a --- /dev/null +++ b/lib/mutagen/mp4/_util.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2014 Christoph Reiter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from mutagen._util import cdata + + +def parse_full_atom(data): + """Some atoms are versioned. Split them up in (version, flags, payload). + Can raise ValueError. + """ + + if len(data) < 4: + raise ValueError("not enough data") + + version = ord(data[0:1]) + flags = cdata.uint_be(b"\x00" + data[1:4]) + return version, flags, data[4:] diff --git a/lib/mutagen/musepack.py b/lib/mutagen/musepack.py index 9804deb3..0dc940f1 100644 --- a/lib/mutagen/musepack.py +++ b/lib/mutagen/musepack.py @@ -1,7 +1,7 @@ -# A Musepack reader/tagger -# -# Copyright 2006 Lukas Lalinsky -# Copyright 2012 Christoph Reiter +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Lukas Lalinsky +# Copyright (C) 2012 Christoph Reiter # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,8 @@ __all__ = ["Musepack", "Open", "delete"] import struct +from ._compat import endswith, xrange +from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete from mutagen.id3 import BitPaddedInt from mutagen._util import cdata @@ -46,8 +48,9 @@ def _parse_sv8_int(fileobj, limit=9): c = fileobj.read(1) if len(c) != 1: raise EOFError - num = (num << 7) | (ord(c) & 0x7F) - if not ord(c) & 0x80: + c = bytearray(c) + num = (num << 7) | (c[0] & 0x7F) + if not c[0] & 0x80: return num, i + 1 if limit > 0: raise ValueError @@ -63,7 +66,7 @@ def _calc_sv8_peak(peak): return (10 ** (peak / (256.0 * 20.0)) / 65535.0) -class MusepackInfo(object): +class MusepackInfo(StreamInfo): """Musepack stream information. Attributes: @@ -91,7 +94,7 @@ class MusepackInfo(object): raise MusepackHeaderError("not a Musepack file") # Skip ID3v2 tags - if header[:3] == "ID3": + if header[:3] == b"ID3": header = fileobj.read(6) if len(header) != 6: raise MusepackHeaderError("not a Musepack file") @@ -101,7 +104,7 @@ class MusepackInfo(object): if len(header) != 4: raise MusepackHeaderError("not a Musepack file") - if header.startswith("MPCK"): + if header.startswith(b"MPCK"): self.__parse_sv8(fileobj) else: self.__parse_sv467(fileobj) @@ -111,29 +114,31 @@ class MusepackInfo(object): self.bitrate = int(round(fileobj.tell() * 8 / self.length)) def __parse_sv8(self, fileobj): - #SV8 http://trac.musepack.net/trac/wiki/SV8Specification + # SV8 http://trac.musepack.net/trac/wiki/SV8Specification key_size = 2 - mandatory_packets = ["SH", "RG"] + mandatory_packets = [b"SH", b"RG"] def check_frame_key(key): - if len(frame_type) != key_size or not 'AA' <= frame_type <= 'ZZ': + if ((len(frame_type) != key_size) or + (not b'AA' <= frame_type <= b'ZZ')): raise MusepackHeaderError("Invalid frame key.") frame_type = fileobj.read(key_size) check_frame_key(frame_type) - while frame_type not in ("AP", "SE") and mandatory_packets: + while frame_type not in (b"AP", b"SE") and mandatory_packets: try: frame_size, slen = _parse_sv8_int(fileobj) except (EOFError, ValueError): raise MusepackHeaderError("Invalid packet size.") data_size = frame_size - key_size - slen + # packets can be at maximum data_size big and are padded with zeros - if frame_type == "SH": + if frame_type == b"SH": mandatory_packets.remove(frame_type) self.__parse_stream_header(fileobj, data_size) - elif frame_type == "RG": + elif frame_type == b"RG": mandatory_packets.remove(frame_type) self.__parse_replaygain_packet(fileobj, data_size) else: @@ -143,37 +148,43 @@ class MusepackInfo(object): check_frame_key(frame_type) if mandatory_packets: - raise MusepackHeaderError("Missing mandatory packets: %s." - % ", ".join(mandatory_packets)) + raise MusepackHeaderError("Missing mandatory packets: %s." % + ", ".join(map(repr, mandatory_packets))) self.length = float(self.samples) / self.sample_rate self.bitrate = 0 def __parse_stream_header(self, fileobj, data_size): + # skip CRC fileobj.seek(4, 1) + remaining_size = data_size - 4 + try: - self.version = ord(fileobj.read(1)) + self.version = bytearray(fileobj.read(1))[0] except TypeError: raise MusepackHeaderError("SH packet ended unexpectedly.") + + remaining_size -= 1 + try: samples, l1 = _parse_sv8_int(fileobj) samples_skip, l2 = _parse_sv8_int(fileobj) except (EOFError, ValueError): raise MusepackHeaderError( "SH packet: Invalid sample counts.") - left_size = data_size - 5 - l1 - l2 - if left_size != 2: - raise MusepackHeaderError("Invalid SH packet size.") - data = fileobj.read(left_size) - if len(data) != left_size: - raise MusepackHeaderError("SH packet ended unexpectedly.") - self.sample_rate = RATES[ord(data[-2]) >> 5] - self.channels = (ord(data[-1]) >> 4) + 1 + self.samples = samples - samples_skip + remaining_size -= l1 + l2 + + data = fileobj.read(remaining_size) + if len(data) != remaining_size: + raise MusepackHeaderError("SH packet ended unexpectedly.") + self.sample_rate = RATES[bytearray(data)[0] >> 5] + self.channels = (bytearray(data)[1] >> 4) + 1 def __parse_replaygain_packet(self, fileobj, data_size): data = fileobj.read(data_size) - if data_size != 9: + if data_size < 9: raise MusepackHeaderError("Invalid RG packet size.") if len(data) != data_size: raise MusepackHeaderError("RG packet ended unexpectedly.") @@ -197,8 +208,8 @@ class MusepackInfo(object): raise MusepackHeaderError("not a Musepack file") # SV7 - if header.startswith("MP+"): - self.version = ord(header[3]) & 0xF + if header.startswith(b"MP+"): + self.version = bytearray(header)[3] & 0xF if self.version < 7: raise MusepackHeaderError("not a Musepack file") frames = cdata.uint_le(header[4:8]) @@ -250,8 +261,10 @@ class Musepack(APEv2File): @staticmethod def score(filename, fileobj, header): - return (header.startswith("MP+") + header.startswith("MPCK") + - filename.lower().endswith(".mpc")) + filename = filename.lower() + + return (header.startswith(b"MP+") + header.startswith(b"MPCK") + + endswith(filename, b".mpc")) Open = Musepack diff --git a/lib/mutagen/ogg.py b/lib/mutagen/ogg.py index 657eb7f7..99eaf422 100644 --- a/lib/mutagen/ogg.py +++ b/lib/mutagen/ogg.py @@ -1,4 +1,6 @@ -# Copyright 2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -18,13 +20,12 @@ import struct import sys import zlib -from cStringIO import StringIO - from mutagen import FileType -from mutagen._util import cdata, insert_bytes, delete_bytes +from mutagen._util import cdata, insert_bytes, delete_bytes, MutagenError +from ._compat import cBytesIO, reraise, chr_ -class error(IOError): +class error(IOError, MutagenError): """Ogg stream parsing errors.""" pass @@ -59,7 +60,7 @@ class OggPage(object): version = 0 __type_flags = 0 - position = 0L + position = 0 serial = 0 sequence = 0 offset = None @@ -78,15 +79,15 @@ class OggPage(object): raise EOFError try: - (oggs, self.version, self.__type_flags, self.position, - self.serial, self.sequence, crc, segments) = struct.unpack( - "<4sBBqIIiB", header) + (oggs, self.version, self.__type_flags, + self.position, self.serial, self.sequence, + crc, segments) = struct.unpack("<4sBBqIIiB", header) except struct.error: raise error("unable to read full header; got %r" % header) - if oggs != "OggS": + if oggs != b"OggS": raise error("read %r, expected %r, at 0x%x" % ( - oggs, "OggS", fileobj.tell() - 27)) + oggs, b"OggS", fileobj.tell() - 27)) if self.version != 0: raise error("version %r unsupported" % self.version) @@ -96,7 +97,7 @@ class OggPage(object): lacing_bytes = fileobj.read(segments) if len(lacing_bytes) != segments: raise error("unable to read %r lacing bytes" % segments) - for c in map(ord, lacing_bytes): + for c in bytearray(lacing_bytes): total += c if c < 255: lacings.append(total) @@ -105,8 +106,8 @@ class OggPage(object): lacings.append(total) self.complete = False - self.packets = map(fileobj.read, lacings) - if map(len, self.packets) != lacings: + self.packets = [fileobj.read(l) for l in lacings] + if [len(p) for p in self.packets] != lacings: raise error("unable to read full data") def __eq__(self, other): @@ -134,21 +135,21 @@ class OggPage(object): """ data = [ - struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags, + struct.pack("<4sBBqIIi", b"OggS", self.version, self.__type_flags, self.position, self.serial, self.sequence, 0) ] lacing_data = [] for datum in self.packets: quot, rem = divmod(len(datum), 255) - lacing_data.append("\xff" * quot + chr(rem)) - lacing_data = "".join(lacing_data) - if not self.complete and lacing_data.endswith("\x00"): + lacing_data.append(b"\xff" * quot + chr_(rem)) + lacing_data = b"".join(lacing_data) + if not self.complete and lacing_data.endswith(b"\x00"): lacing_data = lacing_data[:-1] - data.append(chr(len(lacing_data))) + data.append(chr_(len(lacing_data))) data.append(lacing_data) data.extend(self.packets) - data = "".join(data) + data = b"".join(data) # Python's CRC is swapped relative to Ogg's needs. # crc32 returns uint prior to py2.6 on some platforms, so force uint @@ -196,8 +197,8 @@ class OggPage(object): lambda self, v: self.__set_flag(2, v), doc="This is the last page of a logical bitstream.") - @classmethod - def renumber(klass, fileobj, serial, start): + @staticmethod + def renumber(fileobj, serial, start): """Renumber pages belonging to a specified logical stream. fileobj must be opened with mode r+b or w+b. @@ -235,8 +236,8 @@ class OggPage(object): fileobj.seek(page.offset + page.size, 0) number += 1 - @classmethod - def to_packets(klass, pages, strict=False): + @staticmethod + def to_packets(pages, strict=False): """Construct a list of packet data from a list of Ogg pages. If strict is true, the first page must start a new packet, @@ -253,7 +254,7 @@ class OggPage(object): if not pages[-1].complete: raise ValueError("last packet does not complete") elif pages and pages[0].continued: - packets.append([""]) + packets.append([b""]) for page in pages: if serial != page.serial: @@ -267,13 +268,13 @@ class OggPage(object): packets[-1].append(page.packets[0]) else: packets.append([page.packets[0]]) - packets.extend([[p] for p in page.packets[1:]]) + packets.extend([p] for p in page.packets[1:]) - return ["".join(p) for p in packets] + return [b"".join(p) for p in packets] - @classmethod - def from_packets(klass, packets, sequence=0, - default_size=4096, wiggle_room=2048): + @staticmethod + def from_packets(packets, sequence=0, default_size=4096, + wiggle_room=2048): """Construct a list of Ogg pages from a list of packet data. The algorithm will generate pages of approximately @@ -300,7 +301,7 @@ class OggPage(object): page.sequence = sequence for packet in packets: - page.packets.append("") + page.packets.append(b"") while packet: data, packet = packet[:chunk_size], packet[chunk_size:] if page.size < default_size and len(page.packets) < 255: @@ -314,7 +315,7 @@ class OggPage(object): if page.packets[-1]: page.complete = False if len(page.packets) == 1: - page.position = -1L + page.position = -1 else: page.packets.pop(-1) pages.append(page) @@ -325,7 +326,7 @@ class OggPage(object): if len(packet) < wiggle_room: page.packets[-1] += packet - packet = "" + packet = b"" if page.packets: pages.append(page) @@ -333,7 +334,7 @@ class OggPage(object): return pages @classmethod - def replace(klass, fileobj, old_pages, new_pages): + def replace(cls, fileobj, old_pages, new_pages): """Replace old_pages with new_pages within fileobj. old_pages must have come from reading fileobj originally. @@ -359,9 +360,9 @@ class OggPage(object): new_pages[-1].last = old_pages[-1].last new_pages[-1].complete = old_pages[-1].complete if not new_pages[-1].complete and len(new_pages[-1].packets) == 1: - new_pages[-1].position = -1L + new_pages[-1].position = -1 - new_data = "".join(map(klass.write, new_pages)) + new_data = b"".join(cls.write(p) for p in new_pages) # Make room in the file for the new data. delta = len(new_data) @@ -386,10 +387,10 @@ class OggPage(object): fileobj.seek(new_data_end, 0) serial = new_pages[-1].serial sequence = new_pages[-1].sequence + 1 - klass.renumber(fileobj, serial, sequence) + cls.renumber(fileobj, serial, sequence) - @classmethod - def find_last(klass, fileobj, serial): + @staticmethod + def find_last(fileobj, serial): """Find the last page of the stream 'serial'. If the file is not multiplexed this function is fast. If it is, @@ -401,19 +402,19 @@ class OggPage(object): # For non-muxed streams, look at the last page. try: - fileobj.seek(-256*256, 2) + fileobj.seek(-256 * 256, 2) except IOError: # The file is less than 64k in length. fileobj.seek(0) data = fileobj.read() try: - index = data.rindex("OggS") + index = data.rindex(b"OggS") except ValueError: raise error("unable to find final Ogg header") - stringobj = StringIO(data[index:]) + bytesobj = cBytesIO(data[index:]) best_page = None try: - page = OggPage(stringobj) + page = OggPage(bytesobj) except error: pass else: @@ -459,10 +460,10 @@ class OggFileType(FileType): self.info = self._Info(fileobj) self.tags = self._Tags(fileobj, self.info) self.info._post_tags(fileobj) - except error, e: - raise self._Error, e, sys.exc_info()[2] + except error as e: + reraise(self._Error, e, sys.exc_info()[2]) except EOFError: - raise self._Error, "no appropriate stream found" + raise self._Error("no appropriate stream found") finally: fileobj.close() @@ -480,10 +481,10 @@ class OggFileType(FileType): try: try: self.tags._inject(fileobj) - except error, e: - raise self._Error, e, sys.exc_info()[2] + except error as e: + reraise(self._Error, e, sys.exc_info()[2]) except EOFError: - raise self._Error, "no appropriate stream found" + raise self._Error("no appropriate stream found") finally: fileobj.close() @@ -499,9 +500,9 @@ class OggFileType(FileType): try: try: self.tags._inject(fileobj) - except error, e: - raise self._Error, e, sys.exc_info()[2] + except error as e: + reraise(self._Error, e, sys.exc_info()[2]) except EOFError: - raise self._Error, "no appropriate stream found" + raise self._Error("no appropriate stream found") finally: fileobj.close() diff --git a/lib/mutagen/oggflac.py b/lib/mutagen/oggflac.py index 14ecec00..507a7f55 100644 --- a/lib/mutagen/oggflac.py +++ b/lib/mutagen/oggflac.py @@ -1,6 +1,6 @@ -# Ogg FLAC support. -# -# Copyright 2006 Joe Wreschnig +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,7 +19,7 @@ __all__ = ["OggFLAC", "Open", "delete"] import struct -from cStringIO import StringIO +from ._compat import cBytesIO from mutagen.flac import StreamInfo, VCFLACDict, StrictFileObject from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -54,11 +54,11 @@ class OggFLACStreamInfo(StreamInfo): data = data._fileobj page = OggPage(data) - while not page.packets[0].startswith("\x7FFLAC"): + while not page.packets[0].startswith(b"\x7FFLAC"): page = OggPage(data) major, minor, self.packets, flac = struct.unpack( ">BBH4s", page.packets[0][5:13]) - if flac != "fLaC": + if flac != b"fLaC": raise OggFLACHeaderError("invalid FLAC marker (%r)" % flac) elif (major, minor) != (1, 0): raise OggFLACHeaderError( @@ -66,7 +66,7 @@ class OggFLACStreamInfo(StreamInfo): self.serial = page.serial # Skip over the block header. - stringobj = StrictFileObject(StringIO(page.packets[0][17:])) + stringobj = StrictFileObject(cBytesIO(page.packets[0][17:])) super(OggFLACStreamInfo, self).load(stringobj) def _post_tags(self, fileobj): @@ -76,7 +76,7 @@ class OggFLACStreamInfo(StreamInfo): self.length = page.position / float(self.sample_rate) def pprint(self): - return "Ogg " + super(OggFLACStreamInfo, self).pprint() + return u"Ogg " + super(OggFLACStreamInfo, self).pprint() class OggFLACVComment(VCFLACDict): @@ -90,7 +90,7 @@ class OggFLACVComment(VCFLACDict): if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) - comment = StringIO(OggPage.to_packets(pages)[0][4:]) + comment = cBytesIO(OggPage.to_packets(pages)[0][4:]) super(OggFLACVComment, self).load(comment, errors=errors) def _inject(self, fileobj): @@ -100,7 +100,7 @@ class OggFLACVComment(VCFLACDict): # second packet - and second page - must be the comment data. fileobj.seek(0) page = OggPage(fileobj) - while not page.packets[0].startswith("\x7FFLAC"): + while not page.packets[0].startswith(b"\x7FFLAC"): page = OggPage(fileobj) first_page = page @@ -117,7 +117,7 @@ class OggFLACVComment(VCFLACDict): # Set the new comment block. data = self.write() - data = packets[0][0] + struct.pack(">I", len(data))[-3:] + data + data = packets[0][:1] + struct.pack(">I", len(data))[-3:] + data packets[0] = data new_pages = OggPage.from_packets(packets, old_pages[0].sequence) @@ -134,8 +134,8 @@ class OggFLAC(OggFileType): @staticmethod def score(filename, fileobj, header): - return (header.startswith("OggS") * ( - ("FLAC" in header) + ("fLaC" in header))) + return (header.startswith(b"OggS") * ( + (b"FLAC" in header) + (b"fLaC" in header))) Open = OggFLAC diff --git a/lib/mutagen/oggopus.py b/lib/mutagen/oggopus.py index 6de44391..091dcf43 100644 --- a/lib/mutagen/oggopus.py +++ b/lib/mutagen/oggopus.py @@ -1,4 +1,6 @@ -# Copyright 2012 Christoph Reiter +# -*- coding: utf-8 -*- + +# Copyright (C) 2012, 2013 Christoph Reiter # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,6 +18,8 @@ __all__ = ["OggOpus", "Open", "delete"] import struct +from mutagen import StreamInfo +from mutagen._compat import BytesIO from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -28,7 +32,7 @@ class OggOpusHeaderError(error): pass -class OggOpusInfo(object): +class OggOpusInfo(StreamInfo): """Ogg Opus stream information. Attributes: @@ -41,7 +45,7 @@ class OggOpusInfo(object): def __init__(self, fileobj): page = OggPage(fileobj) - while not page.packets[0].startswith("OpusHead"): + while not page.packets[0].startswith(b"OpusHead"): page = OggPage(fileobj) self.serial = page.serial @@ -56,7 +60,7 @@ class OggOpusInfo(object): self.__pre_skip = pre_skip # only the higher 4 bits change on incombatible changes - major, minor = version >> 4, version & 0xF + major = version >> 4 if major != 0: raise OggOpusHeaderError("version %r unsupported" % major) @@ -65,7 +69,7 @@ class OggOpusInfo(object): self.length = (page.position - self.__pre_skip) / float(48000) def pprint(self): - return "Ogg Opus, %.2f seconds" % (self.length) + return u"Ogg Opus, %.2f seconds" % (self.length) class OggOpusVComment(VCommentDict): @@ -74,8 +78,8 @@ class OggOpusVComment(VCommentDict): def __get_comment_pages(self, fileobj, info): # find the first tags page with the right serial page = OggPage(fileobj) - while info.serial != page.serial or \ - not page.packets[0].startswith("OpusTags"): + while ((info.serial != page.serial) or + not page.packets[0].startswith(b"OpusTags")): page = OggPage(fileobj) # get all comment pages @@ -90,7 +94,16 @@ class OggOpusVComment(VCommentDict): def __init__(self, fileobj, info): pages = self.__get_comment_pages(fileobj, info) data = OggPage.to_packets(pages)[0][8:] # Strip OpusTags - super(OggOpusVComment, self).__init__(data, framing=False) + fileobj = BytesIO(data) + super(OggOpusVComment, self).__init__(fileobj, framing=False) + + # in case the LSB of the first byte after v-comment is 1, preserve the + # following data + padding_flag = fileobj.read(1) + if padding_flag and ord(padding_flag) & 0x1: + self._pad_data = padding_flag + fileobj.read() + else: + self._pad_data = b"" def _inject(self, fileobj): fileobj.seek(0) @@ -98,7 +111,7 @@ class OggOpusVComment(VCommentDict): old_pages = self.__get_comment_pages(fileobj, info) packets = OggPage.to_packets(old_pages) - packets[0] = "OpusTags" + self.write(framing=False) + packets[0] = b"OpusTags" + self.write(framing=False) + self._pad_data new_pages = OggPage.from_packets(packets, old_pages[0].sequence) OggPage.replace(fileobj, old_pages, new_pages) @@ -113,7 +126,7 @@ class OggOpus(OggFileType): @staticmethod def score(filename, fileobj, header): - return (header.startswith("OggS") * ("OpusHead" in header)) + return (header.startswith(b"OggS") * (b"OpusHead" in header)) Open = OggOpus diff --git a/lib/mutagen/oggspeex.py b/lib/mutagen/oggspeex.py index 4f208521..0590fa6b 100644 --- a/lib/mutagen/oggspeex.py +++ b/lib/mutagen/oggspeex.py @@ -1,5 +1,5 @@ -# Ogg Speex support. -# +# -*- coding: utf-8 -*- + # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify @@ -19,6 +19,7 @@ http://lists.xiph.org/pipermail/speex-dev/2006-July/004676.html. __all__ = ["OggSpeex", "Open", "delete"] +from mutagen import StreamInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError from mutagen._util import cdata @@ -32,7 +33,7 @@ class OggSpeexHeaderError(error): pass -class OggSpeexInfo(object): +class OggSpeexInfo(StreamInfo): """Ogg Speex stream information. Attributes: @@ -49,7 +50,7 @@ class OggSpeexInfo(object): def __init__(self, fileobj): page = OggPage(fileobj) - while not page.packets[0].startswith("Speex "): + while not page.packets[0].startswith(b"Speex "): page = OggPage(fileobj) if not page.first: raise OggSpeexHeaderError( @@ -64,7 +65,7 @@ class OggSpeexInfo(object): self.length = page.position / float(self.sample_rate) def pprint(self): - return "Ogg Speex, %.2f seconds" % self.length + return u"Ogg Speex, %.2f seconds" % self.length class OggSpeexVComment(VCommentDict): @@ -78,7 +79,7 @@ class OggSpeexVComment(VCommentDict): if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) - data = OggPage.to_packets(pages)[0] + "\x01" + data = OggPage.to_packets(pages)[0] + b"\x01" super(OggSpeexVComment, self).__init__(data, framing=False) def _inject(self, fileobj): @@ -89,7 +90,7 @@ class OggSpeexVComment(VCommentDict): # Find the first header page, with the stream info. # Use it to get the serial number. page = OggPage(fileobj) - while not page.packets[0].startswith("Speex "): + while not page.packets[0].startswith(b"Speex "): page = OggPage(fileobj) # Look for the next page with that serial number, it'll start @@ -125,7 +126,7 @@ class OggSpeex(OggFileType): @staticmethod def score(filename, fileobj, header): - return (header.startswith("OggS") * ("Speex " in header)) + return (header.startswith(b"OggS") * (b"Speex " in header)) Open = OggSpeex diff --git a/lib/mutagen/oggtheora.py b/lib/mutagen/oggtheora.py index edf221a7..0542823c 100644 --- a/lib/mutagen/oggtheora.py +++ b/lib/mutagen/oggtheora.py @@ -1,5 +1,5 @@ -# Ogg Theora support. -# +# -*- coding: utf-8 -*- + # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify @@ -18,6 +18,7 @@ __all__ = ["OggTheora", "Open", "delete"] import struct +from mutagen import StreamInfo from mutagen._vorbis import VCommentDict from mutagen._util import cdata from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -31,7 +32,7 @@ class OggTheoraHeaderError(error): pass -class OggTheoraInfo(object): +class OggTheoraInfo(StreamInfo): """Ogg Theora stream information. Attributes: @@ -44,7 +45,7 @@ class OggTheoraInfo(object): def __init__(self, fileobj): page = OggPage(fileobj) - while not page.packets[0].startswith("\x80theora"): + while not page.packets[0].startswith(b"\x80theora"): page = OggPage(fileobj) if not page.first: raise OggTheoraHeaderError( @@ -56,7 +57,7 @@ class OggTheoraInfo(object): "found Theora version %d.%d != 3.2" % (vmaj, vmin)) fps_num, fps_den = struct.unpack(">2I", data[22:30]) self.fps = fps_num / float(fps_den) - self.bitrate = cdata.uint_be("\x00" + data[37:40]) + self.bitrate = cdata.uint_be(b"\x00" + data[37:40]) self.granule_shift = (cdata.ushort_be(data[40:42]) >> 5) & 0x1F self.serial = page.serial @@ -83,14 +84,14 @@ class OggTheoraCommentDict(VCommentDict): pages.append(page) complete = page.complete or (len(page.packets) > 1) data = OggPage.to_packets(pages)[0][7:] - super(OggTheoraCommentDict, self).__init__(data + "\x01") + super(OggTheoraCommentDict, self).__init__(data + b"\x01") def _inject(self, fileobj): """Write tag data into the Theora comment packet/page.""" fileobj.seek(0) page = OggPage(fileobj) - while not page.packets[0].startswith("\x81theora"): + while not page.packets[0].startswith(b"\x81theora"): page = OggPage(fileobj) old_pages = [page] @@ -101,7 +102,7 @@ class OggTheoraCommentDict(VCommentDict): packets = OggPage.to_packets(old_pages, strict=False) - packets[0] = "\x81theora" + self.write(framing=False) + packets[0] = b"\x81theora" + self.write(framing=False) new_pages = OggPage.from_packets(packets, old_pages[0].sequence) OggPage.replace(fileobj, old_pages, new_pages) @@ -117,8 +118,8 @@ class OggTheora(OggFileType): @staticmethod def score(filename, fileobj, header): - return (header.startswith("OggS") * - (("\x80theora" in header) + ("\x81theora" in header))) + return (header.startswith(b"OggS") * + ((b"\x80theora" in header) + (b"\x81theora" in header)) * 2) Open = OggTheora diff --git a/lib/mutagen/oggvorbis.py b/lib/mutagen/oggvorbis.py index 509fd966..fda58c4f 100644 --- a/lib/mutagen/oggvorbis.py +++ b/lib/mutagen/oggvorbis.py @@ -1,5 +1,5 @@ -# Ogg Vorbis support. -# +# -*- coding: utf-8 -*- + # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify @@ -19,6 +19,7 @@ __all__ = ["OggVorbis", "Open", "delete"] import struct +from mutagen import StreamInfo from mutagen._vorbis import VCommentDict from mutagen.ogg import OggPage, OggFileType, error as OggError @@ -31,7 +32,7 @@ class OggVorbisHeaderError(error): pass -class OggVorbisInfo(object): +class OggVorbisInfo(StreamInfo): """Ogg Vorbis stream information. Attributes: @@ -44,7 +45,7 @@ class OggVorbisInfo(object): def __init__(self, fileobj): page = OggPage(fileobj) - while not page.packets[0].startswith("\x01vorbis"): + while not page.packets[0].startswith(b"\x01vorbis"): page = OggPage(fileobj) if not page.first: raise OggVorbisHeaderError( @@ -73,7 +74,8 @@ class OggVorbisInfo(object): self.length = page.position / float(self.sample_rate) def pprint(self): - return "Ogg Vorbis, %.2f seconds, %d bps" % (self.length, self.bitrate) + return u"Ogg Vorbis, %.2f seconds, %d bps" % ( + self.length, self.bitrate) class OggVCommentDict(VCommentDict): @@ -97,7 +99,7 @@ class OggVCommentDict(VCommentDict): # plus grab any stray setup packet data out of them. fileobj.seek(0) page = OggPage(fileobj) - while not page.packets[0].startswith("\x03vorbis"): + while not page.packets[0].startswith(b"\x03vorbis"): page = OggPage(fileobj) old_pages = [page] @@ -109,7 +111,7 @@ class OggVCommentDict(VCommentDict): packets = OggPage.to_packets(old_pages, strict=False) # Set the new comment packet. - packets[0] = "\x03vorbis" + self.write() + packets[0] = b"\x03vorbis" + self.write() new_pages = OggPage.from_packets(packets, old_pages[0].sequence) OggPage.replace(fileobj, old_pages, new_pages) @@ -125,7 +127,7 @@ class OggVorbis(OggFileType): @staticmethod def score(filename, fileobj, header): - return (header.startswith("OggS") * ("\x01vorbis" in header)) + return (header.startswith(b"OggS") * (b"\x01vorbis" in header)) Open = OggVorbis diff --git a/lib/mutagen/optimfrog.py b/lib/mutagen/optimfrog.py index 24a87af8..3b6a70d8 100644 --- a/lib/mutagen/optimfrog.py +++ b/lib/mutagen/optimfrog.py @@ -1,6 +1,6 @@ -# OptimFROG reader/tagger -# -# Copyright 2006 Lukas Lalinsky +# -*- coding: utf-8 -*- + +# Copyright (C) 2006 Lukas Lalinsky # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -22,6 +22,8 @@ __all__ = ["OptimFROG", "Open", "delete"] import struct +from ._compat import endswith +from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete @@ -29,7 +31,7 @@ class OptimFROGHeaderError(error): pass -class OptimFROGInfo(object): +class OptimFROGInfo(StreamInfo): """OptimFROG stream information. Attributes: @@ -41,7 +43,7 @@ class OptimFROGInfo(object): def __init__(self, fileobj): header = fileobj.read(76) - if (len(header) != 76 or not header.startswith("OFR ") or + if (len(header) != 76 or not header.startswith(b"OFR ") or struct.unpack("> 23) & 0xF] + + self.version = header.version + self.channels = bool(header.flags & 4) or 2 + self.sample_rate = RATES[(header.flags >> 23) & 0xF] + + if header.total_samples == -1 or header.block_index != 0: + # TODO: we could make this faster by using the tag size + # and search backwards for the last block, then do + # last.block_index + last.block_samples - initial.block_index + samples = header.block_samples + while 1: + fileobj.seek(header.block_size - 32 + 8, 1) + try: + header = _WavPackHeader.from_fileobj(fileobj) + except WavPackHeaderError: + break + samples += header.block_samples + else: + samples = header.total_samples + self.length = float(samples) / self.sample_rate def pprint(self): @@ -57,7 +118,7 @@ class WavPack(APEv2File): @staticmethod def score(filename, fileobj, header): - return header.startswith("wvpk") * 2 + return header.startswith(b"wvpk") * 2 Open = WavPack