From f03b82e5f6f4a4bd271aa84e55253851a93438e3 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Mon, 7 Feb 2022 09:30:17 +0530 Subject: [PATCH] Update mutagen to 1.45.1 --- lib/mutagen/__init__.py | 2 +- lib/mutagen/_compat.py | 86 ----- lib/mutagen/_constants.py | 384 +++++++++---------- lib/mutagen/_file.py | 28 +- lib/mutagen/_iff.py | 387 +++++++++++++++++++ lib/mutagen/_riff.py | 70 ++++ lib/mutagen/_senf/README.rst | 0 lib/mutagen/_senf/__init__.py | 27 +- lib/mutagen/_senf/__init__.pyi | 104 +++++ lib/mutagen/_senf/_argv.py | 19 +- lib/mutagen/_senf/_compat.py | 31 +- lib/mutagen/_senf/_environ.py | 27 +- lib/mutagen/_senf/_fsnative.py | 205 +++++----- lib/mutagen/_senf/_print.py | 85 ++++- lib/mutagen/_senf/_stdlib.py | 14 +- lib/mutagen/_senf/_temp.py | 12 +- lib/mutagen/_senf/_winansi.py | 14 +- lib/mutagen/_senf/_winapi.py | 298 ++++++++------- lib/mutagen/_senf/py.typed | 0 lib/mutagen/_tags.py | 4 +- lib/mutagen/_tools/__init__.py | 0 lib/mutagen/_tools/_util.py | 4 +- lib/mutagen/_tools/mid3cp.py | 15 +- lib/mutagen/_tools/mid3iconv.py | 17 +- lib/mutagen/_tools/mid3v2.py | 119 +++--- lib/mutagen/_tools/moggsplit.py | 42 +-- lib/mutagen/_tools/mutagen_inspect.py | 11 +- lib/mutagen/_tools/mutagen_pony.py | 4 +- lib/mutagen/_util.py | 238 +++++------- lib/mutagen/_vorbis.py | 51 +-- lib/mutagen/aac.py | 10 +- lib/mutagen/ac3.py | 330 ++++++++++++++++ lib/mutagen/aiff.py | 286 ++++---------- lib/mutagen/apev2.py | 116 ++---- lib/mutagen/asf/__init__.py | 34 +- lib/mutagen/asf/_attrs.py | 39 +- lib/mutagen/asf/_objects.py | 41 +- lib/mutagen/asf/_util.py | 522 +++++++++++++------------- lib/mutagen/dsdiff.py | 267 +++++++++++++ lib/mutagen/dsf.py | 24 +- lib/mutagen/easyid3.py | 73 ++-- lib/mutagen/easymp4.py | 44 +-- lib/mutagen/flac.py | 73 ++-- lib/mutagen/id3/__init__.py | 36 +- lib/mutagen/id3/_file.py | 47 ++- lib/mutagen/id3/_frames.py | 115 +++--- lib/mutagen/id3/_id3v1.py | 92 +++-- lib/mutagen/id3/_specs.py | 67 ++-- lib/mutagen/id3/_tags.py | 92 +++-- lib/mutagen/id3/_util.py | 16 +- lib/mutagen/m4a.py | 4 +- lib/mutagen/monkeysaudio.py | 8 +- lib/mutagen/mp3/__init__.py | 40 +- lib/mutagen/mp3/_util.py | 94 ++--- lib/mutagen/mp4/__init__.py | 233 ++++++++++-- lib/mutagen/mp4/_as_entry.py | 28 +- lib/mutagen/mp4/_atom.py | 9 +- lib/mutagen/mp4/_util.py | 0 lib/mutagen/musepack.py | 23 +- lib/mutagen/ogg.py | 60 +-- lib/mutagen/oggflac.py | 13 +- lib/mutagen/oggopus.py | 6 +- lib/mutagen/oggspeex.py | 4 +- lib/mutagen/oggtheora.py | 20 +- lib/mutagen/oggvorbis.py | 15 +- lib/mutagen/optimfrog.py | 32 +- lib/mutagen/smf.py | 9 +- lib/mutagen/tak.py | 238 ++++++++++++ lib/mutagen/trueaudio.py | 7 +- lib/mutagen/wave.py | 210 +++++++++++ lib/mutagen/wavpack.py | 22 +- 71 files changed, 3744 insertions(+), 1953 deletions(-) mode change 100755 => 100644 lib/mutagen/__init__.py delete mode 100755 lib/mutagen/_compat.py mode change 100755 => 100644 lib/mutagen/_constants.py mode change 100755 => 100644 lib/mutagen/_file.py create mode 100644 lib/mutagen/_iff.py create mode 100644 lib/mutagen/_riff.py mode change 100755 => 100644 lib/mutagen/_senf/README.rst mode change 100755 => 100644 lib/mutagen/_senf/__init__.py create mode 100644 lib/mutagen/_senf/__init__.pyi mode change 100755 => 100644 lib/mutagen/_senf/_argv.py mode change 100755 => 100644 lib/mutagen/_senf/_compat.py mode change 100755 => 100644 lib/mutagen/_senf/_environ.py mode change 100755 => 100644 lib/mutagen/_senf/_fsnative.py mode change 100755 => 100644 lib/mutagen/_senf/_print.py mode change 100755 => 100644 lib/mutagen/_senf/_stdlib.py mode change 100755 => 100644 lib/mutagen/_senf/_temp.py mode change 100755 => 100644 lib/mutagen/_senf/_winansi.py mode change 100755 => 100644 lib/mutagen/_senf/_winapi.py create mode 100644 lib/mutagen/_senf/py.typed mode change 100755 => 100644 lib/mutagen/_tags.py mode change 100755 => 100644 lib/mutagen/_tools/__init__.py mode change 100755 => 100644 lib/mutagen/_tools/_util.py mode change 100755 => 100644 lib/mutagen/_tools/mid3cp.py mode change 100755 => 100644 lib/mutagen/_tools/mid3iconv.py mode change 100755 => 100644 lib/mutagen/_tools/mid3v2.py mode change 100755 => 100644 lib/mutagen/_tools/moggsplit.py mode change 100755 => 100644 lib/mutagen/_tools/mutagen_inspect.py mode change 100755 => 100644 lib/mutagen/_tools/mutagen_pony.py mode change 100755 => 100644 lib/mutagen/_util.py mode change 100755 => 100644 lib/mutagen/_vorbis.py mode change 100755 => 100644 lib/mutagen/aac.py create mode 100644 lib/mutagen/ac3.py mode change 100755 => 100644 lib/mutagen/aiff.py mode change 100755 => 100644 lib/mutagen/apev2.py mode change 100755 => 100644 lib/mutagen/asf/__init__.py mode change 100755 => 100644 lib/mutagen/asf/_attrs.py mode change 100755 => 100644 lib/mutagen/asf/_objects.py mode change 100755 => 100644 lib/mutagen/asf/_util.py create mode 100644 lib/mutagen/dsdiff.py mode change 100755 => 100644 lib/mutagen/dsf.py mode change 100755 => 100644 lib/mutagen/easyid3.py mode change 100755 => 100644 lib/mutagen/easymp4.py mode change 100755 => 100644 lib/mutagen/flac.py mode change 100755 => 100644 lib/mutagen/id3/__init__.py mode change 100755 => 100644 lib/mutagen/id3/_file.py mode change 100755 => 100644 lib/mutagen/id3/_frames.py mode change 100755 => 100644 lib/mutagen/id3/_id3v1.py mode change 100755 => 100644 lib/mutagen/id3/_specs.py mode change 100755 => 100644 lib/mutagen/id3/_tags.py mode change 100755 => 100644 lib/mutagen/id3/_util.py mode change 100755 => 100644 lib/mutagen/m4a.py mode change 100755 => 100644 lib/mutagen/monkeysaudio.py mode change 100755 => 100644 lib/mutagen/mp3/__init__.py mode change 100755 => 100644 lib/mutagen/mp3/_util.py mode change 100755 => 100644 lib/mutagen/mp4/__init__.py mode change 100755 => 100644 lib/mutagen/mp4/_as_entry.py mode change 100755 => 100644 lib/mutagen/mp4/_atom.py mode change 100755 => 100644 lib/mutagen/mp4/_util.py mode change 100755 => 100644 lib/mutagen/musepack.py mode change 100755 => 100644 lib/mutagen/ogg.py mode change 100755 => 100644 lib/mutagen/oggflac.py mode change 100755 => 100644 lib/mutagen/oggopus.py mode change 100755 => 100644 lib/mutagen/oggspeex.py mode change 100755 => 100644 lib/mutagen/oggtheora.py mode change 100755 => 100644 lib/mutagen/oggvorbis.py mode change 100755 => 100644 lib/mutagen/optimfrog.py mode change 100755 => 100644 lib/mutagen/smf.py create mode 100644 lib/mutagen/tak.py mode change 100755 => 100644 lib/mutagen/trueaudio.py create mode 100644 lib/mutagen/wave.py mode change 100755 => 100644 lib/mutagen/wavpack.py diff --git a/lib/mutagen/__init__.py b/lib/mutagen/__init__.py old mode 100755 new mode 100644 index 4603fe6e..6e2a48b0 --- a/lib/mutagen/__init__.py +++ b/lib/mutagen/__init__.py @@ -23,7 +23,7 @@ from mutagen._util import MutagenError from mutagen._file import FileType, StreamInfo, File from mutagen._tags import Tags, Metadata, PaddingInfo -version = (1, 38, -1) +version = (1, 45, 1) """Version tuple.""" version_string = ".".join(map(str, version)) diff --git a/lib/mutagen/_compat.py b/lib/mutagen/_compat.py deleted file mode 100755 index 167670ad..00000000 --- a/lib/mutagen/_compat.py +++ /dev/null @@ -1,86 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2013 Christoph Reiter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -import sys - - -PY2 = sys.version_info[0] == 2 -PY3 = not PY2 - -if PY2: - from io import StringIO - BytesIO = StringIO - from io import StringIO as cBytesIO - - - long_ = int - integer_types = (int, int) - string_types = (str, str) - text_type = str - - xrange = xrange - cmp = cmp - chr_ = chr - - def endswith(text, end): - return text.endswith(end) - - iteritems = lambda d: iter(d.items()) - itervalues = lambda d: iter(d.values()) - iterkeys = lambda d: iter(d.keys()) - - iterbytes = lambda b: iter(b) - - exec("def reraise(tp, value, tb):\n raise tp, value, tb") - - def swap_to_string(cls): - if "__str__" in cls.__dict__: - cls.__unicode__ = cls.__str__ - - if "__bytes__" in cls.__dict__: - cls.__str__ = cls.__bytes__ - - return cls - -elif PY3: - from io import StringIO - StringIO = StringIO - from io import BytesIO - cBytesIO = BytesIO - - long_ = int - integer_types = (int,) - string_types = (str,) - text_type = str - - izip = zip - xrange = range - cmp = lambda a, b: (a > b) - (a < b) - chr_ = lambda x: bytes([x]) - - def endswith(text, end): - # usefull for paths which can be both, str and bytes - if isinstance(text, str): - if not isinstance(end, str): - end = end.decode("ascii") - else: - if not isinstance(end, bytes): - end = end.encode("ascii") - return text.endswith(end) - - iteritems = lambda d: iter(list(d.items())) - itervalues = lambda d: iter(list(d.values())) - iterkeys = lambda d: iter(list(d.keys())) - - iterbytes = lambda b: (bytes([v]) for v in b) - - def reraise(tp, value, tb): - raise tp(value).with_traceback(tb) - - def swap_to_string(cls): - return cls diff --git a/lib/mutagen/_constants.py b/lib/mutagen/_constants.py old mode 100755 new mode 100644 index a866b6f8..5c1c1a10 --- a/lib/mutagen/_constants.py +++ b/lib/mutagen/_constants.py @@ -8,197 +8,197 @@ """Constants used by Mutagen.""" GENRES = [ - "Blues", - "Classic Rock", - "Country", - "Dance", - "Disco", - "Funk", - "Grunge", - "Hip-Hop", - "Jazz", - "Metal", - "New Age", - "Oldies", - "Other", - "Pop", - "R&B", - "Rap", - "Reggae", - "Rock", - "Techno", - "Industrial", - "Alternative", - "Ska", - "Death Metal", - "Pranks", - "Soundtrack", - "Euro-Techno", - "Ambient", - "Trip-Hop", - "Vocal", - "Jazz+Funk", - "Fusion", - "Trance", - "Classical", - "Instrumental", - "Acid", - "House", - "Game", - "Sound Clip", - "Gospel", - "Noise", - "Alt. Rock", - "Bass", - "Soul", - "Punk", - "Space", - "Meditative", - "Instrumental Pop", - "Instrumental Rock", - "Ethnic", - "Gothic", - "Darkwave", - "Techno-Industrial", - "Electronic", - "Pop-Folk", - "Eurodance", - "Dream", - "Southern Rock", - "Comedy", - "Cult", - "Gangsta Rap", - "Top 40", - "Christian Rap", - "Pop/Funk", - "Jungle", - "Native American", - "Cabaret", - "New Wave", - "Psychedelic", - "Rave", - "Showtunes", - "Trailer", - "Lo-Fi", - "Tribal", - "Acid Punk", - "Acid Jazz", - "Polka", - "Retro", - "Musical", - "Rock & Roll", - "Hard Rock", - "Folk", - "Folk-Rock", - "National Folk", - "Swing", - "Fast-Fusion", - "Bebop", - "Latin", - "Revival", - "Celtic", - "Bluegrass", - "Avantgarde", - "Gothic Rock", - "Progressive Rock", - "Psychedelic Rock", - "Symphonic Rock", - "Slow Rock", - "Big Band", - "Chorus", - "Easy Listening", - "Acoustic", - "Humour", - "Speech", - "Chanson", - "Opera", - "Chamber Music", - "Sonata", - "Symphony", - "Booty Bass", - "Primus", - "Porn Groove", - "Satire", - "Slow Jam", - "Club", - "Tango", - "Samba", - "Folklore", - "Ballad", - "Power Ballad", - "Rhythmic Soul", - "Freestyle", - "Duet", - "Punk Rock", - "Drum Solo", - "A Cappella", - "Euro-House", - "Dance Hall", - "Goa", - "Drum & Bass", - "Club-House", - "Hardcore", - "Terror", - "Indie", - "BritPop", - "Afro-Punk", - "Polsk Punk", - "Beat", - "Christian Gangsta Rap", - "Heavy Metal", - "Black Metal", - "Crossover", - "Contemporary Christian", - "Christian Rock", - "Merengue", - "Salsa", - "Thrash Metal", - "Anime", - "JPop", - "Synthpop", - "Abstract", - "Art Rock", - "Baroque", - "Bhangra", - "Big Beat", - "Breakbeat", - "Chillout", - "Downtempo", - "Dub", - "EBM", - "Eclectic", - "Electro", - "Electroclash", - "Emo", - "Experimental", - "Garage", - "Global", - "IDM", - "Illbient", - "Industro-Goth", - "Jam Band", - "Krautrock", - "Leftfield", - "Lounge", - "Math Rock", - "New Romantic", - "Nu-Breakz", - "Post-Punk", - "Post-Rock", - "Psytrance", - "Shoegaze", - "Space Rock", - "Trop Rock", - "World Music", - "Neoclassical", - "Audiobook", - "Audio Theatre", - "Neue Deutsche Welle", - "Podcast", - "Indie Rock", - "G-Funk", - "Dubstep", - "Garage Rock", - "Psybient", + u"Blues", + u"Classic Rock", + u"Country", + u"Dance", + u"Disco", + u"Funk", + u"Grunge", + u"Hip-Hop", + u"Jazz", + u"Metal", + u"New Age", + u"Oldies", + u"Other", + u"Pop", + u"R&B", + u"Rap", + u"Reggae", + u"Rock", + u"Techno", + u"Industrial", + u"Alternative", + u"Ska", + u"Death Metal", + u"Pranks", + u"Soundtrack", + u"Euro-Techno", + u"Ambient", + u"Trip-Hop", + u"Vocal", + u"Jazz+Funk", + u"Fusion", + u"Trance", + u"Classical", + u"Instrumental", + u"Acid", + u"House", + u"Game", + u"Sound Clip", + u"Gospel", + u"Noise", + u"Alt. Rock", + u"Bass", + u"Soul", + u"Punk", + u"Space", + u"Meditative", + u"Instrumental Pop", + u"Instrumental Rock", + u"Ethnic", + u"Gothic", + u"Darkwave", + u"Techno-Industrial", + u"Electronic", + u"Pop-Folk", + u"Eurodance", + u"Dream", + u"Southern Rock", + u"Comedy", + u"Cult", + u"Gangsta Rap", + u"Top 40", + u"Christian Rap", + u"Pop/Funk", + u"Jungle", + u"Native American", + u"Cabaret", + u"New Wave", + u"Psychedelic", + u"Rave", + u"Showtunes", + u"Trailer", + u"Lo-Fi", + u"Tribal", + u"Acid Punk", + u"Acid Jazz", + u"Polka", + u"Retro", + u"Musical", + u"Rock & Roll", + u"Hard Rock", + u"Folk", + u"Folk-Rock", + u"National Folk", + u"Swing", + u"Fast-Fusion", + u"Bebop", + u"Latin", + u"Revival", + u"Celtic", + u"Bluegrass", + u"Avantgarde", + u"Gothic Rock", + u"Progressive Rock", + u"Psychedelic Rock", + u"Symphonic Rock", + u"Slow Rock", + u"Big Band", + u"Chorus", + u"Easy Listening", + u"Acoustic", + u"Humour", + u"Speech", + u"Chanson", + u"Opera", + u"Chamber Music", + u"Sonata", + u"Symphony", + u"Booty Bass", + u"Primus", + u"Porn Groove", + u"Satire", + u"Slow Jam", + u"Club", + u"Tango", + u"Samba", + u"Folklore", + u"Ballad", + u"Power Ballad", + u"Rhythmic Soul", + u"Freestyle", + u"Duet", + u"Punk Rock", + u"Drum Solo", + u"A Cappella", + u"Euro-House", + u"Dance Hall", + u"Goa", + u"Drum & Bass", + u"Club-House", + u"Hardcore", + u"Terror", + u"Indie", + u"BritPop", + u"Afro-Punk", + u"Polsk Punk", + u"Beat", + u"Christian Gangsta Rap", + u"Heavy Metal", + u"Black Metal", + u"Crossover", + u"Contemporary Christian", + u"Christian Rock", + u"Merengue", + u"Salsa", + u"Thrash Metal", + u"Anime", + u"JPop", + u"Synthpop", + u"Abstract", + u"Art Rock", + u"Baroque", + u"Bhangra", + u"Big Beat", + u"Breakbeat", + u"Chillout", + u"Downtempo", + u"Dub", + u"EBM", + u"Eclectic", + u"Electro", + u"Electroclash", + u"Emo", + u"Experimental", + u"Garage", + u"Global", + u"IDM", + u"Illbient", + u"Industro-Goth", + u"Jam Band", + u"Krautrock", + u"Leftfield", + u"Lounge", + u"Math Rock", + u"New Romantic", + u"Nu-Breakz", + u"Post-Punk", + u"Post-Rock", + u"Psytrance", + u"Shoegaze", + u"Space Rock", + u"Trop Rock", + u"World Music", + u"Neoclassical", + u"Audiobook", + u"Audio Theatre", + u"Neue Deutsche Welle", + u"Podcast", + u"Indie Rock", + u"G-Funk", + u"Dubstep", + u"Garage Rock", + u"Psybient", ] """The ID3v1 genre list.""" diff --git a/lib/mutagen/_file.py b/lib/mutagen/_file.py old mode 100755 new mode 100644 index 1f2b6d14..1c367ffc --- a/lib/mutagen/_file.py +++ b/lib/mutagen/_file.py @@ -9,7 +9,6 @@ import warnings from mutagen._util import DictMixin, loadfile -from mutagen._compat import izip class FileType(DictMixin): @@ -94,10 +93,10 @@ class FileType(DictMixin): if self.tags is None: return [] else: - return list(self.tags.keys()) + return self.tags.keys() @loadfile(writable=True) - def delete(self, filething): + def delete(self, filething=None): """delete(filething=None) Remove tags from a file. @@ -113,14 +112,14 @@ class FileType(DictMixin): Does nothing if the file has no tags. Raises: - MutagenError: if deleting wasn't possible + mutagen.MutagenError: if deleting wasn't possible """ if self.tags is not None: return self.tags.delete(filething) @loadfile(writable=True) - def save(self, filething, **kwargs): + def save(self, filething=None, **kwargs): """save(filething=None, **kwargs) Save metadata tags. @@ -150,14 +149,15 @@ class FileType(DictMixin): """Adds new tags to the file. Raises: - MutagenError: if tags already exist or adding is not possible. + mutagen.MutagenError: + if tags already exist or adding is not possible. """ raise NotImplementedError @property def mime(self): - """A list of mime types (`text`)""" + """A list of mime types (:class:`mutagen.text`)""" mimes = [] for Kind in type(self).__mro__: @@ -171,7 +171,7 @@ class FileType(DictMixin): """Returns a score for how likely the file can be parsed by this type. Args: - filename (path): a file path + filename (fspath): a file path fileobj (fileobj): a file object open in rb mode. Position is undefined header (bytes): data of undefined length, starts with the start of @@ -220,13 +220,13 @@ def File(filething, options=None, easy=False): filething (filething) options: Sequence of :class:`FileType` implementations, defaults to all included ones. - easy (bool): If the easy wrappers should be returnd if available. + easy (bool): If the easy wrappers should be returned if available. For example :class:`EasyMP3 ` instead of :class:`MP3 `. Returns: FileType: A FileType instance for the detected type or `None` in case - the type couln't be determined. + the type couldn't be determined. Raises: MutagenError: in case the detected type fails to load the file. @@ -263,12 +263,16 @@ def File(filething, options=None, easy=False): from mutagen.optimfrog import OptimFROG from mutagen.aiff import AIFF from mutagen.aac import AAC + from mutagen.ac3 import AC3 from mutagen.smf import SMF + from mutagen.tak import TAK from mutagen.dsf import DSF + from mutagen.dsdiff import DSDIFF + from mutagen.wave import WAVE options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, FLAC, AIFF, APEv2File, MP4, ID3FileType, WavPack, - Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC, - SMF, DSF] + Musepack, MonkeysAudio, OptimFROG, ASF, OggOpus, AAC, AC3, + SMF, TAK, DSF, DSDIFF, WAVE] if not options: return None diff --git a/lib/mutagen/_iff.py b/lib/mutagen/_iff.py new file mode 100644 index 00000000..4aae2466 --- /dev/null +++ b/lib/mutagen/_iff.py @@ -0,0 +1,387 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2014 Evan Purkhiser +# 2014 Ben Ockmore +# 2017 Borewit +# 2019-2020 Philipp Wolfer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Base classes for various IFF based formats (e.g. AIFF or RIFF).""" + +import sys + +from mutagen.id3 import ID3 +from mutagen.id3._util import ID3NoHeaderError, error as ID3Error +from mutagen._util import ( + MutagenError, + convert_error, + delete_bytes, + insert_bytes, + loadfile, + reraise, + resize_bytes, +) + + +class error(MutagenError): + pass + + +class InvalidChunk(error): + pass + + +class EmptyChunk(InvalidChunk): + pass + + +def is_valid_chunk_id(id): + """ is_valid_chunk_id(FOURCC) + + Arguments: + id (FOURCC) + Returns: + true if valid; otherwise false + + Check if argument id is valid FOURCC type. + """ + + assert isinstance(id, str), \ + 'id is of type %s, must be str: %r' % (type(id), id) + + return ((0 < len(id) <= 4) and (min(id) >= ' ') and + (max(id) <= '~')) + + +# Assert FOURCC formatted valid +def assert_valid_chunk_id(id): + if not is_valid_chunk_id(id): + raise ValueError("IFF chunk ID must be four ASCII characters.") + + +class IffChunk(object): + """Generic representation of a single IFF chunk. + + IFF chunks always consist of an ID followed by the chunk size. The exact + format varies between different IFF based formats, e.g. AIFF uses + big-endian while RIFF uses little-endian. + """ + + # Chunk headers are usually 8 bytes long (4 for ID and 4 for the size) + HEADER_SIZE = 8 + + @classmethod + def parse_header(cls, header): + """Read ID and data_size from the given header. + Must be implemented in subclasses.""" + raise error("Not implemented") + + def write_new_header(self, id_, size): + """Write the chunk header with id_ and size to the file. + Must be implemented in subclasses. The data must be written + to the current position in self._fileobj.""" + raise error("Not implemented") + + def write_size(self): + """Write self.data_size to the file. + Must be implemented in subclasses. The data must be written + to the current position in self._fileobj.""" + raise error("Not implemented") + + @classmethod + def get_class(cls, id): + """Returns the class for a new chunk for a given ID. + Can be overridden in subclasses to implement specific chunk types.""" + return cls + + @classmethod + def parse(cls, fileobj, parent_chunk=None): + header = fileobj.read(cls.HEADER_SIZE) + if len(header) < cls.HEADER_SIZE: + raise EmptyChunk('Header size < %i' % cls.HEADER_SIZE) + id, data_size = cls.parse_header(header) + try: + id = id.decode('ascii').rstrip() + except UnicodeDecodeError as e: + raise InvalidChunk(e) + + if not is_valid_chunk_id(id): + raise InvalidChunk('Invalid chunk ID %r' % id) + + return cls.get_class(id)(fileobj, id, data_size, parent_chunk) + + def __init__(self, fileobj, id, data_size, parent_chunk): + self._fileobj = fileobj + self.id = id + self.data_size = data_size + self.parent_chunk = parent_chunk + self.data_offset = fileobj.tell() + self.offset = self.data_offset - self.HEADER_SIZE + self._calculate_size() + + def __repr__(self): + return ("<%s id=%s, offset=%i, size=%i, data_offset=%i, data_size=%i>" + % (type(self).__name__, self.id, self.offset, self.size, + self.data_offset, self.data_size)) + + def read(self): + """Read the chunks data""" + + self._fileobj.seek(self.data_offset) + return self._fileobj.read(self.data_size) + + def write(self, data): + """Write the chunk data""" + + if len(data) > self.data_size: + raise ValueError + + self._fileobj.seek(self.data_offset) + self._fileobj.write(data) + # Write the padding bytes + padding = self.padding() + if padding: + self._fileobj.seek(self.data_offset + self.data_size) + self._fileobj.write(b'\x00' * padding) + + def delete(self): + """Removes the chunk from the file""" + + delete_bytes(self._fileobj, self.size, self.offset) + if self.parent_chunk is not None: + self.parent_chunk._remove_subchunk(self) + self._fileobj.flush() + + def _update_size(self, size_diff, changed_subchunk=None): + """Update the size of the chunk""" + + old_size = self.size + self.data_size += size_diff + self._fileobj.seek(self.offset + 4) + self.write_size() + self._calculate_size() + if self.parent_chunk is not None: + self.parent_chunk._update_size(self.size - old_size, self) + if changed_subchunk: + self._update_sibling_offsets( + changed_subchunk, old_size - self.size) + + def _calculate_size(self): + self.size = self.HEADER_SIZE + self.data_size + self.padding() + assert self.size % 2 == 0 + + def resize(self, new_data_size): + """Resize the file and update the chunk sizes""" + + padding = new_data_size % 2 + resize_bytes(self._fileobj, self.data_size + self.padding(), + new_data_size + padding, self.data_offset) + size_diff = new_data_size - self.data_size + self._update_size(size_diff) + self._fileobj.flush() + + def padding(self): + """Returns the number of padding bytes (0 or 1). + IFF chunks are required to be a even number in total length. If + data_size is odd a padding byte will be added at the end. + """ + return self.data_size % 2 + + +class IffContainerChunkMixin(): + """A IFF chunk containing other chunks. + + A container chunk can have an additional name as the first 4 bytes of the + chunk data followed by an arbitrary number of subchunks. The root chunk of + the file is always a container chunk (e.g. the AIFF chunk or the FORM chunk + for RIFF) but there can be other types of container chunks (e.g. the LIST + chunks used in RIFF). + """ + + def parse_next_subchunk(self): + """""" + raise error("Not implemented") + + def init_container(self, name_size=4): + # Lists can store an additional name identifier before the subchunks + self.__name_size = name_size + if self.data_size < name_size: + raise InvalidChunk( + 'Container chunk data size < %i' % name_size) + + # Read the container name + if name_size > 0: + try: + self.name = self._fileobj.read(name_size).decode('ascii') + except UnicodeDecodeError as e: + raise error(e) + else: + self.name = None + + # Load all IFF subchunks + self.__subchunks = [] + + def subchunks(self): + """Returns a list of all subchunks. + The list is lazily loaded on first access. + """ + if not self.__subchunks: + next_offset = self.data_offset + self.__name_size + while next_offset < self.offset + self.size: + self._fileobj.seek(next_offset) + try: + chunk = self.parse_next_subchunk() + except EmptyChunk: + break + except InvalidChunk: + break + self.__subchunks.append(chunk) + + # Calculate the location of the next chunk + next_offset = chunk.offset + chunk.size + return self.__subchunks + + def insert_chunk(self, id_, data=None): + """Insert a new chunk at the end of the container chunk""" + + if not is_valid_chunk_id(id_): + raise KeyError("Invalid IFF key.") + + next_offset = self.offset + self.size + size = self.HEADER_SIZE + data_size = 0 + if data: + data_size = len(data) + padding = data_size % 2 + size += data_size + padding + insert_bytes(self._fileobj, size, next_offset) + self._fileobj.seek(next_offset) + self.write_new_header(id_.ljust(4).encode('ascii'), data_size) + self._fileobj.seek(next_offset) + chunk = self.parse_next_subchunk() + self._update_size(chunk.size) + if data: + chunk.write(data) + self.subchunks().append(chunk) + self._fileobj.flush() + return chunk + + def __contains__(self, id_): + """Check if this chunk contains a specific subchunk.""" + assert_valid_chunk_id(id_) + try: + self[id_] + return True + except KeyError: + return False + + def __getitem__(self, id_): + """Get a subchunk by ID.""" + assert_valid_chunk_id(id_) + found_chunk = None + for chunk in self.subchunks(): + if chunk.id == id_: + found_chunk = chunk + break + else: + raise KeyError("No %r chunk found" % id_) + return found_chunk + + def __delitem__(self, id_): + """Remove a chunk from the IFF file""" + assert_valid_chunk_id(id_) + self[id_].delete() + + def _remove_subchunk(self, chunk): + assert chunk in self.__subchunks + self._update_size(-chunk.size, chunk) + self.__subchunks.remove(chunk) + + def _update_sibling_offsets(self, changed_subchunk, size_diff): + """Update the offsets of subchunks after `changed_subchunk`. + """ + index = self.__subchunks.index(changed_subchunk) + sibling_chunks = self.__subchunks[index + 1:len(self.__subchunks)] + for sibling in sibling_chunks: + sibling.offset -= size_diff + sibling.data_offset -= size_diff + + +class IffFile: + """Representation of a IFF file""" + + def __init__(self, chunk_cls, fileobj): + fileobj.seek(0) + self.root = chunk_cls.parse(fileobj) + + def __contains__(self, id_): + """Check if the IFF file contains a specific chunk""" + return id_ in self.root + + def __getitem__(self, id_): + """Get a chunk from the IFF file""" + return self.root[id_] + + def __delitem__(self, id_): + """Remove a chunk from the IFF file""" + self.delete_chunk(id_) + + def delete_chunk(self, id_): + """Remove a chunk from the IFF file""" + del self.root[id_] + + def insert_chunk(self, id_, data=None): + """Insert a new chunk at the end of the IFF file""" + return self.root.insert_chunk(id_, data) + + +class IffID3(ID3): + """A generic IFF file with ID3v2 tags""" + + def _load_file(self, fileobj): + raise error("Not implemented") + + def _pre_load_header(self, fileobj): + try: + fileobj.seek(self._load_file(fileobj)['ID3'].data_offset) + except (InvalidChunk, KeyError): + raise ID3NoHeaderError("No ID3 chunk") + + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething=None, v2_version=4, v23_sep='/', padding=None): + """Save ID3v2 data to the IFF file""" + + fileobj = filething.fileobj + + iff_file = self._load_file(fileobj) + + if 'ID3' not in iff_file: + iff_file.insert_chunk('ID3') + + chunk = iff_file['ID3'] + + try: + data = self._prepare_data( + fileobj, chunk.data_offset, chunk.data_size, v2_version, + v23_sep, padding) + except ID3Error as e: + reraise(error, e, sys.exc_info()[2]) + + chunk.resize(len(data)) + chunk.write(data) + + @convert_error(IOError, error) + @loadfile(writable=True) + def delete(self, filething=None): + """Completely removes the ID3 chunk from the IFF file""" + + try: + iff_file = self._load_file(filething.fileobj) + del iff_file['ID3'] + except KeyError: + pass + self.clear() diff --git a/lib/mutagen/_riff.py b/lib/mutagen/_riff.py new file mode 100644 index 00000000..3afe0623 --- /dev/null +++ b/lib/mutagen/_riff.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2017 Borewit +# Copyright (C) 2019-2020 Philipp Wolfer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Resource Interchange File Format (RIFF).""" + +import struct +from struct import pack + +from mutagen._iff import ( + IffChunk, + IffContainerChunkMixin, + IffFile, + InvalidChunk, +) + + +class RiffChunk(IffChunk): + """Generic RIFF chunk""" + + @classmethod + def parse_header(cls, header): + return struct.unpack('<4sI', header) + + @classmethod + def get_class(cls, id): + if id in (u'LIST', u'RIFF'): + return RiffListChunk + else: + return cls + + def write_new_header(self, id_, size): + self._fileobj.write(pack('<4sI', id_, size)) + + def write_size(self): + self._fileobj.write(pack(' None: + ... + +_fsnative = Union[fsnative, _base] + +if sys.platform == "win32": + _bytes_default_encoding = str +else: + _bytes_default_encoding = Optional[str] + +def path2fsn(path: _pathlike) -> _fsnative: + ... + +def fsn2text(path: _fsnative, strict: bool=False) -> Text: + ... + +def text2fsn(text: Text) -> _fsnative: + ... + +def fsn2bytes(path: _fsnative, encoding: _bytes_default_encoding="utf-8") -> bytes: + ... + +def bytes2fsn(data: bytes, encoding: _bytes_default_encoding="utf-8") -> _fsnative: + ... + +def uri2fsn(uri: _uri) -> _fsnative: + ... + +def fsn2uri(path: _fsnative) -> Text: + ... + +def fsn2norm(path: _fsnative) -> _fsnative: + ... + +sep: _fsnative +pathsep: _fsnative +curdir: _fsnative +pardir: _fsnative +altsep: _fsnative +extsep: _fsnative +devnull: _fsnative +defpath: _fsnative + +def getcwd() -> _fsnative: + ... + +def getenv(key: _pathlike, value: Optional[_fsnative]=None) -> Optional[_fsnative]: + ... + +def putenv(key: _pathlike, value: _pathlike): + ... + +def unsetenv(key: _pathlike) -> None: + ... + +def supports_ansi_escape_codes(fd: int) -> bool: + ... + +def expandvars(path: _pathlike) -> _fsnative: + ... + +def expanduser(path: _pathlike) -> _fsnative: + ... + +environ: Dict[_fsnative,_fsnative] +argv: List[_fsnative] + +def gettempdir() -> _fsnative: + pass + +def mkstemp(suffix: Optional[_pathlike]=None, prefix: Optional[_pathlike]=None, dir: Optional[_pathlike]=None, text: bool=False) -> Tuple[int, _fsnative]: + ... + +def mkdtemp(suffix: Optional[_pathlike]=None, prefix: Optional[_pathlike]=None, dir: Optional[_pathlike]=None) -> _fsnative: + ... + +version_string: str + +version: Tuple[int, int, int] + +print_ = print + +def input_(prompt: Any=None) -> _fsnative: + ... diff --git a/lib/mutagen/_senf/_argv.py b/lib/mutagen/_senf/_argv.py old mode 100755 new mode 100644 index 56b1d416..ff5f9abd --- a/lib/mutagen/_senf/_argv.py +++ b/lib/mutagen/_senf/_argv.py @@ -9,12 +9,23 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import sys import ctypes -import collections +try: + from collections import abc +except ImportError: + import collections as abc # type: ignore from functools import total_ordering from ._compat import PY2, string_types @@ -49,7 +60,7 @@ def _get_win_argv(): @total_ordering -class Argv(collections.MutableSequence): +class Argv(abc.MutableSequence): """List[`fsnative`]: Like `sys.argv` but contains unicode keys and values under Windows + Python 2. diff --git a/lib/mutagen/_senf/_compat.py b/lib/mutagen/_senf/_compat.py old mode 100755 new mode 100644 index 4de1ef95..bf1cb304 --- a/lib/mutagen/_senf/_compat.py +++ b/lib/mutagen/_senf/_compat.py @@ -9,8 +9,16 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import sys @@ -20,26 +28,23 @@ PY3 = not PY2 if PY2: - from urllib.parse import urlparse, urlunparse + from urlparse import urlparse, urlunparse urlparse, urlunparse - from urllib.request import pathname2url, url2pathname - from urllib.parse import quote, unquote - pathname2url, url2pathname, quote, unquote + from urllib import quote, unquote + quote, unquote - from io import StringIO + from StringIO import StringIO BytesIO = StringIO from io import StringIO as TextIO TextIO - string_types = (str, str) - text_type = str + string_types = (str, unicode) + text_type = unicode - iteritems = lambda d: iter(d.items()) + iteritems = lambda d: d.iteritems() elif PY3: from urllib.parse import urlparse, quote, unquote, urlunparse urlparse, quote, unquote, urlunparse - from urllib.request import pathname2url, url2pathname - pathname2url, url2pathname from io import StringIO StringIO = StringIO @@ -50,4 +55,4 @@ elif PY3: string_types = (str,) text_type = str - iteritems = lambda d: iter(list(d.items())) + iteritems = lambda d: iter(d.items()) diff --git a/lib/mutagen/_senf/_environ.py b/lib/mutagen/_senf/_environ.py old mode 100755 new mode 100644 index 760bf19d..82536935 --- a/lib/mutagen/_senf/_environ.py +++ b/lib/mutagen/_senf/_environ.py @@ -9,12 +9,23 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import os import ctypes -import collections +try: + from collections import abc +except ImportError: + import collections as abc # type: ignore from ._compat import text_type, PY2 from ._fsnative import path2fsn, is_win, _fsn2legacy, fsnative @@ -86,23 +97,23 @@ def read_windows_environ(): res = ctypes.cast(res, ctypes.POINTER(ctypes.c_wchar)) done = [] - current = "" + current = u"" i = 0 while 1: c = res[i] i += 1 - if c == "\x00": + if c == u"\x00": if not current: break done.append(current) - current = "" + current = u"" continue current += c dict_ = {} for entry in done: try: - key, value = entry.split("=", 1) + key, value = entry.split(u"=", 1) except ValueError: continue key = _norm_key(key) @@ -122,7 +133,7 @@ def _norm_key(key): return key -class Environ(collections.MutableMapping): +class Environ(abc.MutableMapping): """Dict[`fsnative`, `fsnative`]: Like `os.environ` but contains unicode keys and values under Windows + Python 2. diff --git a/lib/mutagen/_senf/_fsnative.py b/lib/mutagen/_senf/_fsnative.py old mode 100755 new mode 100644 index 76626f3c..7cc4ddc1 --- a/lib/mutagen/_senf/_fsnative.py +++ b/lib/mutagen/_senf/_fsnative.py @@ -9,8 +9,16 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import os import sys @@ -18,8 +26,7 @@ import ctypes import codecs from . import _winapi as winapi -from ._compat import text_type, PY3, PY2, url2pathname, urlparse, quote, \ - unquote, urlunparse +from ._compat import text_type, PY3, PY2, urlparse, quote, unquote, urlunparse is_win = os.name == "nt" @@ -49,45 +56,9 @@ def _swap_bytes(data): return bytes(data) -def _codec_fails_on_encode_surrogates(codec, _cache={}): - """Returns if a codec fails correctly when passing in surrogates with - a surrogatepass/surrogateescape error handler. Some codecs were broken - in Python <3.4 - """ - - try: - return _cache[codec] - except KeyError: - try: - "\uD800\uDC01".encode(codec) - except UnicodeEncodeError: - _cache[codec] = True - else: - _cache[codec] = False - return _cache[codec] - - -def _codec_can_decode_with_surrogatepass(codec, _cache={}): - """Returns if a codec supports the surrogatepass error handler when - decoding. Some codecs were broken in Python <3.4 - """ - - try: - return _cache[codec] - except KeyError: - try: - "\ud83d".encode( - codec, _surrogatepass).decode(codec, _surrogatepass) - except UnicodeDecodeError: - _cache[codec] = False - else: - _cache[codec] = True - return _cache[codec] - - -def _bytes2winpath(data, codec): +def _decode_surrogatepass(data, codec): """Like data.decode(codec, 'surrogatepass') but makes utf-16-le/be work - on Python < 3.4 + Windows + on Python 2. https://bugs.python.org/issue27971 @@ -97,7 +68,7 @@ def _bytes2winpath(data, codec): try: return data.decode(codec, _surrogatepass) except UnicodeDecodeError: - if not _codec_can_decode_with_surrogatepass(codec): + if PY2: if _normalize_codec(codec) == "utf-16-be": data = _swap_bytes(data) codec = "utf-16-le" @@ -113,30 +84,45 @@ def _bytes2winpath(data, codec): raise -def _winpath2bytes_py3(text, codec): - """Fallback implementation for text including surrogates""" +def _merge_surrogates(text): + """Returns a copy of the text with all surrogate pairs merged""" - # merge surrogate codepoints - if _normalize_codec(codec).startswith("utf-16"): - # fast path, utf-16 merges anyway - return text.encode(codec, _surrogatepass) - return _bytes2winpath( + return _decode_surrogatepass( text.encode("utf-16-le", _surrogatepass), - "utf-16-le").encode(codec, _surrogatepass) + "utf-16-le") -if PY2: - def _winpath2bytes(text, codec): - return text.encode(codec) -else: - def _winpath2bytes(text, codec): - if _codec_fails_on_encode_surrogates(codec): - try: - return text.encode(codec) - except UnicodeEncodeError: - return _winpath2bytes_py3(text, codec) - else: - return _winpath2bytes_py3(text, codec) +def fsn2norm(path): + """ + Args: + path (fsnative): The path to normalize + Returns: + `fsnative` + + Normalizes an fsnative path. + + The same underlying path can have multiple representations as fsnative + (due to surrogate pairs and variable length encodings). When concatenating + fsnative the result might be different than concatenating the serialized + form and then deserializing it. + + This returns the normalized form i.e. the form which os.listdir() would + return. This is useful when you alter fsnative but require that the same + underlying path always maps to the same fsnative value. + + All functions like :func:`bytes2fsn`, :func:`fsnative`, :func:`text2fsn` + and :func:`path2fsn` always return a normalized path, independent of their + input. + """ + + native = _fsn2native(path) + + if is_win: + return _merge_surrogates(native) + elif PY3: + return bytes2fsn(native, None) + else: + return path def _fsn2legacy(path): @@ -173,14 +159,15 @@ def _fsnative(text): path = text.encode("utf-8", _surrogatepass) if b"\x00" in path: - path = path.replace(b"\x00", fsn2bytes(_fsnative("\uFFFD"), None)) + path = path.replace(b"\x00", fsn2bytes(_fsnative(u"\uFFFD"), None)) if PY3: return path.decode(_encoding, "surrogateescape") return path else: - if "\x00" in text: - text = text.replace("\x00", "\uFFFD") + if u"\x00" in text: + text = text.replace(u"\x00", u"\uFFFD") + text = fsn2norm(text) return text @@ -235,7 +222,7 @@ def _create_fsnative(type_): the `str` only contains ASCII and no NULL. """ - def __new__(cls, text=""): + def __new__(cls, text=u""): return _fsnative(text) new_type = meta("fsnative", (object,), dict(impl.__dict__)) @@ -259,10 +246,10 @@ def _typecheck_fsnative(path): return False if PY3 or is_win: - if "\x00" in path: + if u"\x00" in path: return False - if is_unix and not _is_unicode_encoding: + if is_unix: try: path.encode(_encoding, "surrogateescape") except UnicodeEncodeError: @@ -297,7 +284,6 @@ def _fsn2native(path): try: path = path.encode(_encoding, "surrogateescape") except UnicodeEncodeError: - assert not _is_unicode_encoding # This look more like ValueError, but raising only one error # makes things simpler... also one could say str + surrogates # is its own type @@ -309,7 +295,7 @@ def _fsn2native(path): if b"\x00" in path: raise TypeError("fsnative can't contain nulls") else: - if "\x00" in path: + if u"\x00" in path: raise TypeError("fsnative can't contain nulls") return path @@ -331,7 +317,6 @@ def _get_encoding(): _encoding = _get_encoding() -_is_unicode_encoding = _encoding.startswith("utf") def path2fsn(path): @@ -369,9 +354,11 @@ def path2fsn(path): data = path.encode(_encoding, "surrogateescape") if b"\x00" in data: raise ValueError("embedded null") + path = fsn2norm(path) else: - if "\x00" in path: + if u"\x00" in path: raise ValueError("embedded null") + path = fsn2norm(path) if not isinstance(path, fsnative_type): raise TypeError("path needs to be %s", fsnative_type.__name__) @@ -430,22 +417,21 @@ def text2fsn(text): return fsnative(text) -def fsn2bytes(path, encoding): +def fsn2bytes(path, encoding="utf-8"): """ Args: path (fsnative): The path to convert - encoding (`str` or `None`): `None` if you don't care about Windows + encoding (`str`): encoding used for Windows Returns: `bytes` Raises: TypeError: If no `fsnative` path is passed - ValueError: If encoding fails or no encoding is given + ValueError: If encoding fails or the encoding is invalid Converts a `fsnative` path to `bytes`. The passed *encoding* is only used on platforms where paths are not - associated with an encoding (Windows for example). If you don't care about - Windows you can pass `None`. + associated with an encoding (Windows for example). For Windows paths, lone surrogates will be encoded like normal code points and surrogate pairs will be merged before encoding. In case of ``utf-8`` @@ -459,30 +445,45 @@ def fsn2bytes(path, encoding): if encoding is None: raise ValueError("invalid encoding %r" % encoding) - try: - return _winpath2bytes(path, encoding) - except LookupError: - raise ValueError("invalid encoding %r" % encoding) + if PY2: + try: + return path.encode(encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + else: + try: + return path.encode(encoding) + except LookupError: + raise ValueError("invalid encoding %r" % encoding) + except UnicodeEncodeError: + # Fallback implementation for text including surrogates + # merge surrogate codepoints + if _normalize_codec(encoding).startswith("utf-16"): + # fast path, utf-16 merges anyway + return path.encode(encoding, _surrogatepass) + return _merge_surrogates(path).encode(encoding, _surrogatepass) else: return path -def bytes2fsn(data, encoding): +def bytes2fsn(data, encoding="utf-8"): """ Args: data (bytes): The data to convert - encoding (`str` or `None`): `None` if you don't care about Windows + encoding (`str`): encoding used for Windows Returns: `fsnative` Raises: TypeError: If no `bytes` path is passed - ValueError: If decoding fails or no encoding is given + ValueError: If decoding fails or the encoding is invalid Turns `bytes` to a `fsnative` path. The passed *encoding* is only used on platforms where paths are not - associated with an encoding (Windows for example). If you don't care about - Windows you can pass `None`. + associated with an encoding (Windows for example). + + For Windows paths ``WTF-8`` is accepted if ``utf-8`` is used and + ``WTF-16`` accepted if ``utf-16-le`` is used. """ if not isinstance(data, bytes): @@ -492,10 +493,10 @@ def bytes2fsn(data, encoding): if encoding is None: raise ValueError("invalid encoding %r" % encoding) try: - path = _bytes2winpath(data, encoding) + path = _decode_surrogatepass(data, encoding) except LookupError: raise ValueError("invalid encoding %r" % encoding) - if "\x00" in path: + if u"\x00" in path: raise ValueError("contains nulls") return path else: @@ -543,20 +544,32 @@ def uri2fsn(uri): uri = urlunparse(parsed)[7:] if is_win: - path = url2pathname(uri) + try: + drive, rest = uri.split(":", 1) + except ValueError: + path = "" + rest = uri.replace("/", "\\") + else: + path = drive[-1] + ":" + rest = rest.replace("/", "\\") + if PY2: + path += unquote(rest) + else: + path += unquote(rest, encoding="utf-8", errors="surrogatepass") if netloc: path = "\\\\" + path if PY2: path = path.decode("utf-8") - if "\x00" in path: + if u"\x00" in path: raise ValueError("embedded null") return path else: - path = url2pathname(uri) + if PY2: + path = unquote(uri) + else: + path = unquote(uri, encoding=_encoding, errors="surrogateescape") if "\x00" in path: raise ValueError("embedded null") - if PY3: - path = fsnative(path) return path @@ -594,6 +607,8 @@ def fsn2uri(path): except WindowsError as e: raise ValueError(e) uri = buf[:length.value] + # https://bitbucket.org/pypy/pypy/issues/3133 + uri = _merge_surrogates(uri) # For some reason UrlCreateFromPathW escapes some chars outside of # ASCII and some not. Unquote and re-quote with utf-8. @@ -607,4 +622,4 @@ def fsn2uri(path): return _quote_path(uri.encode("utf-8", _surrogatepass)) else: - return "file://" + _quote_path(path) + return u"file://" + _quote_path(path) diff --git a/lib/mutagen/_senf/_print.py b/lib/mutagen/_senf/_print.py old mode 100755 new mode 100644 index ca19279a..63c50fa5 --- a/lib/mutagen/_senf/_print.py +++ b/lib/mutagen/_senf/_print.py @@ -9,14 +9,23 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import sys import os import ctypes +import re -from ._fsnative import _encoding, is_win, is_unix, _surrogatepass +from ._fsnative import _encoding, is_win, is_unix, _surrogatepass, bytes2fsn from ._compat import text_type, PY2, PY3 from ._winansi import AnsiState, ansi_split from . import _winapi as winapi @@ -155,7 +164,7 @@ def _print_windows(objects, sep, end, file, flush): if not isinstance(end, text_type): raise TypeError - if end == "\n": + if end == u"\n": end = os.linesep text = sep.join(parts) + end @@ -225,7 +234,7 @@ def _readline_windows(): buf = ctypes.create_string_buffer(buf_size * ctypes.sizeof(winapi.WCHAR)) read = winapi.DWORD() - text = "" + text = u"" while True: if winapi.ReadConsoleW( h, buf, buf_size, ctypes.byref(read), None) == 0: @@ -234,7 +243,7 @@ def _readline_windows(): raise ctypes.WinError() data = buf[:read.value * ctypes.sizeof(winapi.WCHAR)] text += data.decode("utf-16-le", _surrogatepass) - if text.endswith("\r\n"): + if text.endswith(u"\r\n"): return text[:-2] @@ -253,7 +262,7 @@ def _decode_codepage(codepage, data): assert isinstance(data, bytes) if not data: - return "" + return u"" # get the required buffer length first length = winapi.MultiByteToWideChar(codepage, 0, data, len(data), None, 0) @@ -351,3 +360,65 @@ def input_(prompt=None): print_(prompt, end="") return _readline() + + +def _get_file_name_for_handle(handle): + """(Windows only) Returns a file name for a file handle. + + Args: + handle (winapi.HANDLE) + Returns: + `text` or `None` if no file name could be retrieved. + """ + + assert is_win + assert handle != winapi.INVALID_HANDLE_VALUE + + size = winapi.FILE_NAME_INFO.FileName.offset + \ + winapi.MAX_PATH * ctypes.sizeof(winapi.WCHAR) + buf = ctypes.create_string_buffer(size) + + if winapi.GetFileInformationByHandleEx is None: + # Windows XP + return None + + status = winapi.GetFileInformationByHandleEx( + handle, winapi.FileNameInfo, buf, size) + if status == 0: + return None + + name_info = ctypes.cast( + buf, ctypes.POINTER(winapi.FILE_NAME_INFO)).contents + offset = winapi.FILE_NAME_INFO.FileName.offset + data = buf[offset:offset + name_info.FileNameLength] + return bytes2fsn(data, "utf-16-le") + + +def supports_ansi_escape_codes(fd): + """Returns whether the output device is capable of interpreting ANSI escape + codes when :func:`print_` is used. + + Args: + fd (int): file descriptor (e.g. ``sys.stdout.fileno()``) + Returns: + `bool` + """ + + if os.isatty(fd): + return True + + if not is_win: + return False + + # Check for cygwin/msys terminal + handle = winapi._get_osfhandle(fd) + if handle == winapi.INVALID_HANDLE_VALUE: + return False + + if winapi.GetFileType(handle) != winapi.FILE_TYPE_PIPE: + return False + + file_name = _get_file_name_for_handle(handle) + match = re.match( + "^\\\\(cygwin|msys)-[a-z0-9]+-pty[0-9]+-(from|to)-master$", file_name) + return match is not None diff --git a/lib/mutagen/_senf/_stdlib.py b/lib/mutagen/_senf/_stdlib.py old mode 100755 new mode 100644 index bfaf271c..f3193d33 --- a/lib/mutagen/_senf/_stdlib.py +++ b/lib/mutagen/_senf/_stdlib.py @@ -9,8 +9,16 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import re import os @@ -38,7 +46,7 @@ def getcwd(): """ if is_win and PY2: - return os.getcwd() + return os.getcwdu() return os.getcwd() diff --git a/lib/mutagen/_senf/_temp.py b/lib/mutagen/_senf/_temp.py old mode 100755 new mode 100644 index ac44dfba..d29b7217 --- a/lib/mutagen/_senf/_temp.py +++ b/lib/mutagen/_senf/_temp.py @@ -9,8 +9,16 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import tempfile diff --git a/lib/mutagen/_senf/_winansi.py b/lib/mutagen/_senf/_winansi.py old mode 100755 new mode 100644 index e6f65537..fbbc1c22 --- a/lib/mutagen/_senf/_winansi.py +++ b/lib/mutagen/_senf/_winansi.py @@ -9,8 +9,16 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import ctypes import re @@ -25,7 +33,7 @@ def ansi_parse(code): return code[-1:], tuple([int(v or "0") for v in code[2:-1].split(";")]) -def ansi_split(text, _re=re.compile("(\x1b\[(\d*;?)*\S)")): +def ansi_split(text, _re=re.compile(u"(\x1b\\[(\\d*;?)*\\S)")): """Yields (is_ansi, text)""" for part in _re.split(text): diff --git a/lib/mutagen/_senf/_winapi.py b/lib/mutagen/_senf/_winapi.py old mode 100755 new mode 100644 index effb85bb..6a63ea35 --- a/lib/mutagen/_senf/_winapi.py +++ b/lib/mutagen/_senf/_winapi.py @@ -9,175 +9,213 @@ # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import sys import ctypes -from ctypes import WinDLL, wintypes +if sys.platform == 'win32': + from ctypes import WinDLL, CDLL, wintypes -shell32 = WinDLL("shell32") -kernel32 = WinDLL("kernel32") -shlwapi = WinDLL("shlwapi") + shell32 = WinDLL("shell32") + kernel32 = WinDLL("kernel32") + shlwapi = WinDLL("shlwapi") + msvcrt = CDLL("msvcrt") -GetCommandLineW = kernel32.GetCommandLineW -GetCommandLineW.argtypes = [] -GetCommandLineW.restype = wintypes.LPCWSTR + GetCommandLineW = kernel32.GetCommandLineW + GetCommandLineW.argtypes = [] + GetCommandLineW.restype = wintypes.LPCWSTR -CommandLineToArgvW = shell32.CommandLineToArgvW -CommandLineToArgvW.argtypes = [ - wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)] -CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR) + CommandLineToArgvW = shell32.CommandLineToArgvW + CommandLineToArgvW.argtypes = [ + wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)] + CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR) -LocalFree = kernel32.LocalFree -LocalFree.argtypes = [wintypes.HLOCAL] -LocalFree.restype = wintypes.HLOCAL + LocalFree = kernel32.LocalFree + LocalFree.argtypes = [wintypes.HLOCAL] + LocalFree.restype = wintypes.HLOCAL -# https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751.aspx -LPCTSTR = ctypes.c_wchar_p -LPWSTR = wintypes.LPWSTR -LPCWSTR = ctypes.c_wchar_p -LPTSTR = LPWSTR -PCWSTR = ctypes.c_wchar_p -PCTSTR = PCWSTR -PWSTR = ctypes.c_wchar_p -PTSTR = PWSTR -LPVOID = wintypes.LPVOID -WCHAR = wintypes.WCHAR -LPSTR = ctypes.c_char_p + # https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751.aspx + LPCTSTR = ctypes.c_wchar_p + LPWSTR = wintypes.LPWSTR + LPCWSTR = ctypes.c_wchar_p + LPTSTR = LPWSTR + PCWSTR = ctypes.c_wchar_p + PCTSTR = PCWSTR + PWSTR = ctypes.c_wchar_p + PTSTR = PWSTR + LPVOID = wintypes.LPVOID + WCHAR = wintypes.WCHAR + LPSTR = ctypes.c_char_p -BOOL = wintypes.BOOL -LPBOOL = ctypes.POINTER(BOOL) -UINT = wintypes.UINT -WORD = wintypes.WORD -DWORD = wintypes.DWORD -SHORT = wintypes.SHORT -HANDLE = wintypes.HANDLE -ULONG = wintypes.ULONG -LPCSTR = wintypes.LPCSTR + BOOL = wintypes.BOOL + LPBOOL = ctypes.POINTER(BOOL) + UINT = wintypes.UINT + WORD = wintypes.WORD + DWORD = wintypes.DWORD + SHORT = wintypes.SHORT + HANDLE = wintypes.HANDLE + ULONG = wintypes.ULONG + LPCSTR = wintypes.LPCSTR -STD_INPUT_HANDLE = DWORD(-10) -STD_OUTPUT_HANDLE = DWORD(-11) -STD_ERROR_HANDLE = DWORD(-12) + STD_INPUT_HANDLE = DWORD(-10) + STD_OUTPUT_HANDLE = DWORD(-11) + STD_ERROR_HANDLE = DWORD(-12) -INVALID_HANDLE_VALUE = wintypes.HANDLE(-1).value + INVALID_HANDLE_VALUE = wintypes.HANDLE(-1).value -INTERNET_MAX_SCHEME_LENGTH = 32 -INTERNET_MAX_PATH_LENGTH = 2048 -INTERNET_MAX_URL_LENGTH = ( - INTERNET_MAX_SCHEME_LENGTH + len("://") + INTERNET_MAX_PATH_LENGTH) + INTERNET_MAX_SCHEME_LENGTH = 32 + INTERNET_MAX_PATH_LENGTH = 2048 + INTERNET_MAX_URL_LENGTH = ( + INTERNET_MAX_SCHEME_LENGTH + len("://") + INTERNET_MAX_PATH_LENGTH) -FOREGROUND_BLUE = 0x0001 -FOREGROUND_GREEN = 0x0002 -FOREGROUND_RED = 0x0004 -FOREGROUND_INTENSITY = 0x0008 + FOREGROUND_BLUE = 0x0001 + FOREGROUND_GREEN = 0x0002 + FOREGROUND_RED = 0x0004 + FOREGROUND_INTENSITY = 0x0008 -BACKGROUND_BLUE = 0x0010 -BACKGROUND_GREEN = 0x0020 -BACKGROUND_RED = 0x0040 -BACKGROUND_INTENSITY = 0x0080 + BACKGROUND_BLUE = 0x0010 + BACKGROUND_GREEN = 0x0020 + BACKGROUND_RED = 0x0040 + BACKGROUND_INTENSITY = 0x0080 -COMMON_LVB_REVERSE_VIDEO = 0x4000 -COMMON_LVB_UNDERSCORE = 0x8000 + COMMON_LVB_REVERSE_VIDEO = 0x4000 + COMMON_LVB_UNDERSCORE = 0x8000 -UrlCreateFromPathW = shlwapi.UrlCreateFromPathW -UrlCreateFromPathW.argtypes = [ - PCTSTR, PTSTR, ctypes.POINTER(DWORD), DWORD] -UrlCreateFromPathW.restype = ctypes.HRESULT + UrlCreateFromPathW = shlwapi.UrlCreateFromPathW + UrlCreateFromPathW.argtypes = [ + PCTSTR, PTSTR, ctypes.POINTER(DWORD), DWORD] + UrlCreateFromPathW.restype = ctypes.HRESULT -SetEnvironmentVariableW = kernel32.SetEnvironmentVariableW -SetEnvironmentVariableW.argtypes = [LPCTSTR, LPCTSTR] -SetEnvironmentVariableW.restype = wintypes.BOOL + SetEnvironmentVariableW = kernel32.SetEnvironmentVariableW + SetEnvironmentVariableW.argtypes = [LPCTSTR, LPCTSTR] + SetEnvironmentVariableW.restype = wintypes.BOOL -GetEnvironmentVariableW = kernel32.GetEnvironmentVariableW -GetEnvironmentVariableW.argtypes = [LPCTSTR, LPTSTR, DWORD] -GetEnvironmentVariableW.restype = DWORD + GetEnvironmentVariableW = kernel32.GetEnvironmentVariableW + GetEnvironmentVariableW.argtypes = [LPCTSTR, LPTSTR, DWORD] + GetEnvironmentVariableW.restype = DWORD -GetEnvironmentStringsW = kernel32.GetEnvironmentStringsW -GetEnvironmentStringsW.argtypes = [] -GetEnvironmentStringsW.restype = ctypes.c_void_p + GetEnvironmentStringsW = kernel32.GetEnvironmentStringsW + GetEnvironmentStringsW.argtypes = [] + GetEnvironmentStringsW.restype = ctypes.c_void_p -FreeEnvironmentStringsW = kernel32.FreeEnvironmentStringsW -FreeEnvironmentStringsW.argtypes = [ctypes.c_void_p] -FreeEnvironmentStringsW.restype = ctypes.c_bool + FreeEnvironmentStringsW = kernel32.FreeEnvironmentStringsW + FreeEnvironmentStringsW.argtypes = [ctypes.c_void_p] + FreeEnvironmentStringsW.restype = ctypes.c_bool -GetStdHandle = kernel32.GetStdHandle -GetStdHandle.argtypes = [DWORD] -GetStdHandle.restype = HANDLE + GetStdHandle = kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + class COORD(ctypes.Structure): -class COORD(ctypes.Structure): + _fields_ = [ + ("X", SHORT), + ("Y", SHORT), + ] - _fields_ = [ - ("X", SHORT), - ("Y", SHORT), - ] + class SMALL_RECT(ctypes.Structure): + _fields_ = [ + ("Left", SHORT), + ("Top", SHORT), + ("Right", SHORT), + ("Bottom", SHORT), + ] -class SMALL_RECT(ctypes.Structure): + class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure): - _fields_ = [ - ("Left", SHORT), - ("Top", SHORT), - ("Right", SHORT), - ("Bottom", SHORT), - ] + _fields_ = [ + ("dwSize", COORD), + ("dwCursorPosition", COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", COORD), + ] + GetConsoleScreenBufferInfo = kernel32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)] + GetConsoleScreenBufferInfo.restype = BOOL -class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure): + GetConsoleOutputCP = kernel32.GetConsoleOutputCP + GetConsoleOutputCP.argtypes = [] + GetConsoleOutputCP.restype = UINT - _fields_ = [ - ("dwSize", COORD), - ("dwCursorPosition", COORD), - ("wAttributes", WORD), - ("srWindow", SMALL_RECT), - ("dwMaximumWindowSize", COORD), - ] + SetConsoleOutputCP = kernel32.SetConsoleOutputCP + SetConsoleOutputCP.argtypes = [UINT] + SetConsoleOutputCP.restype = BOOL + GetConsoleCP = kernel32.GetConsoleCP + GetConsoleCP.argtypes = [] + GetConsoleCP.restype = UINT -GetConsoleScreenBufferInfo = kernel32.GetConsoleScreenBufferInfo -GetConsoleScreenBufferInfo.argtypes = [ - HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)] -GetConsoleScreenBufferInfo.restype = BOOL + SetConsoleCP = kernel32.SetConsoleCP + SetConsoleCP.argtypes = [UINT] + SetConsoleCP.restype = BOOL -GetConsoleOutputCP = kernel32.GetConsoleOutputCP -GetConsoleOutputCP.argtypes = [] -GetConsoleOutputCP.restype = UINT + SetConsoleTextAttribute = kernel32.SetConsoleTextAttribute + SetConsoleTextAttribute.argtypes = [HANDLE, WORD] + SetConsoleTextAttribute.restype = BOOL -SetConsoleOutputCP = kernel32.SetConsoleOutputCP -SetConsoleOutputCP.argtypes = [UINT] -SetConsoleOutputCP.restype = BOOL + SetConsoleCursorPosition = kernel32.SetConsoleCursorPosition + SetConsoleCursorPosition.argtypes = [HANDLE, COORD] + SetConsoleCursorPosition.restype = BOOL -GetConsoleCP = kernel32.GetConsoleCP -GetConsoleCP.argtypes = [] -GetConsoleCP.restype = UINT + ReadConsoleW = kernel32.ReadConsoleW + ReadConsoleW.argtypes = [ + HANDLE, LPVOID, DWORD, ctypes.POINTER(DWORD), LPVOID] + ReadConsoleW.restype = BOOL -SetConsoleCP = kernel32.SetConsoleCP -SetConsoleCP.argtypes = [UINT] -SetConsoleCP.restype = BOOL + MultiByteToWideChar = kernel32.MultiByteToWideChar + MultiByteToWideChar.argtypes = [ + UINT, DWORD, LPCSTR, ctypes.c_int, LPWSTR, ctypes.c_int] + MultiByteToWideChar.restype = ctypes.c_int -SetConsoleTextAttribute = kernel32.SetConsoleTextAttribute -SetConsoleTextAttribute.argtypes = [HANDLE, WORD] -SetConsoleTextAttribute.restype = BOOL + WideCharToMultiByte = kernel32.WideCharToMultiByte + WideCharToMultiByte.argtypes = [ + UINT, DWORD, LPCWSTR, ctypes.c_int, LPSTR, ctypes.c_int, + LPCSTR, LPBOOL] + WideCharToMultiByte.restype = ctypes.c_int -SetConsoleCursorPosition = kernel32.SetConsoleCursorPosition -SetConsoleCursorPosition.argtypes = [HANDLE, COORD] -SetConsoleCursorPosition.restype = BOOL + MoveFileW = kernel32.MoveFileW + MoveFileW.argtypes = [LPCTSTR, LPCTSTR] + MoveFileW.restype = BOOL -ReadConsoleW = kernel32.ReadConsoleW -ReadConsoleW.argtypes = [HANDLE, LPVOID, DWORD, ctypes.POINTER(DWORD), LPVOID] -ReadConsoleW.restype = BOOL + GetFileInformationByHandleEx = None + if hasattr(kernel32, "GetFileInformationByHandleEx"): + GetFileInformationByHandleEx = kernel32.GetFileInformationByHandleEx + GetFileInformationByHandleEx.argtypes = [ + HANDLE, ctypes.c_int, ctypes.c_void_p, DWORD] + GetFileInformationByHandleEx.restype = BOOL + else: + # Windows XP + pass -MultiByteToWideChar = kernel32.MultiByteToWideChar -MultiByteToWideChar.argtypes = [ - UINT, DWORD, LPCSTR, ctypes.c_int, LPWSTR, ctypes.c_int] -MultiByteToWideChar.restype = ctypes.c_int + MAX_PATH = 260 + FileNameInfo = 2 -WideCharToMultiByte = kernel32.WideCharToMultiByte -WideCharToMultiByte.argtypes = [ - UINT, DWORD, LPCWSTR, ctypes.c_int, LPSTR, ctypes.c_int, LPCSTR, LPBOOL] -WideCharToMultiByte.restpye = ctypes.c_int + class FILE_NAME_INFO(ctypes.Structure): + _fields_ = [ + ("FileNameLength", DWORD), + ("FileName", WCHAR), + ] -MoveFileW = kernel32.MoveFileW -MoveFileW.argtypes = [LPCTSTR, LPCTSTR] -MoveFileW.restype = BOOL + _get_osfhandle = msvcrt._get_osfhandle + _get_osfhandle.argtypes = [ctypes.c_int] + _get_osfhandle.restype = HANDLE + + GetFileType = kernel32.GetFileType + GetFileType.argtypes = [HANDLE] + GetFileType.restype = DWORD + + FILE_TYPE_PIPE = 0x0003 diff --git a/lib/mutagen/_senf/py.typed b/lib/mutagen/_senf/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/mutagen/_tags.py b/lib/mutagen/_tags.py old mode 100755 new mode 100644 index c3f2ebf6..b64caa9a --- a/lib/mutagen/_tags.py +++ b/lib/mutagen/_tags.py @@ -115,7 +115,7 @@ class Metadata(Tags): raise NotImplementedError @loadfile(writable=False) - def save(self, filething, **kwargs): + def save(self, filething=None, **kwargs): """save(filething=None, **kwargs) Save changes to a file. @@ -129,7 +129,7 @@ class Metadata(Tags): raise NotImplementedError @loadfile(writable=False) - def delete(self, filething): + def delete(self, filething=None): """delete(filething=None) Remove tags from a file. diff --git a/lib/mutagen/_tools/__init__.py b/lib/mutagen/_tools/__init__.py old mode 100755 new mode 100644 diff --git a/lib/mutagen/_tools/_util.py b/lib/mutagen/_tools/_util.py old mode 100755 new mode 100644 index 4e050769..a31de654 --- a/lib/mutagen/_tools/_util.py +++ b/lib/mutagen/_tools/_util.py @@ -12,7 +12,7 @@ import contextlib import optparse from mutagen._senf import print_ -from mutagen._compat import text_type, iterbytes +from mutagen._util import iterbytes def split_escape(string, sep, maxsplit=None, escape_char="\\"): @@ -25,7 +25,7 @@ def split_escape(string, sep, maxsplit=None, escape_char="\\"): assert len(escape_char) == 1 if isinstance(string, bytes): - if isinstance(escape_char, text_type): + if isinstance(escape_char, str): escape_char = escape_char.encode("ascii") iter_ = iterbytes else: diff --git a/lib/mutagen/_tools/mid3cp.py b/lib/mutagen/_tools/mid3cp.py old mode 100755 new mode 100644 index 4d4a5c44..995c7040 --- a/lib/mutagen/_tools/mid3cp.py +++ b/lib/mutagen/_tools/mid3cp.py @@ -16,7 +16,6 @@ import os.path import mutagen import mutagen.id3 from mutagen._senf import print_, argv -from mutagen._compat import text_type from ._util import SignalHandler, OptionParser @@ -52,14 +51,14 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): try: id3 = mutagen.id3.ID3(src, translate=False) except mutagen.id3.ID3NoHeaderError: - print_("No ID3 header found in ", src, file=sys.stderr) + print_(u"No ID3 header found in ", src, file=sys.stderr) return 1 except Exception as err: print_(str(err), file=sys.stderr) return 1 if verbose: - print_("File", src, "contains:", file=sys.stderr) + print_(u"File", src, u"contains:", file=sys.stderr) print_(id3.pprint(), file=sys.stderr) for tag in excluded_tags: @@ -75,7 +74,7 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): print_(str(err), file=sys.stderr) return 1 else: - for frame in list(id3.values()): + for frame in id3.values(): target.add(frame) id3 = target @@ -91,12 +90,12 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): try: id3.save(dst, v1=(2 if write_v1 else 0), v2_version=v2_version) except Exception as err: - print_("Error saving", dst, ":\n%s" % text_type(err), + print_(u"Error saving", dst, u":\n%s" % str(err), file=sys.stderr) return 1 else: if verbose: - print_("Successfully saved", dst, file=sys.stderr) + print_(u"Successfully saved", dst, file=sys.stderr) return 0 @@ -120,12 +119,12 @@ def main(argv): (src, dst) = args if not os.path.isfile(src): - print_("File not found:", src, file=sys.stderr) + print_(u"File not found:", src, file=sys.stderr) parser.print_help(file=sys.stderr) return 1 if not os.path.isfile(dst): - printerr("File not found:", dst, file=sys.stderr) + printerr(u"File not found:", dst, file=sys.stderr) parser.print_help(file=sys.stderr) return 1 diff --git a/lib/mutagen/_tools/mid3iconv.py b/lib/mutagen/_tools/mid3iconv.py old mode 100755 new mode 100644 index 5faa3ba0..5cc42f40 --- a/lib/mutagen/_tools/mid3iconv.py +++ b/lib/mutagen/_tools/mid3iconv.py @@ -16,7 +16,6 @@ import locale import mutagen import mutagen.id3 from mutagen._senf import argv, print_, fsnative -from mutagen._compat import text_type from ._util import SignalHandler, OptionParser @@ -75,7 +74,7 @@ def update(options, filenames): for filename in filenames: with _sig.block(): if verbose != "quiet": - print_("Updating", filename) + print_(u"Updating", filename) if has_id3v1(filename) and not noupdate and force_v1: mutagen.id3.delete(filename, False, True) @@ -84,13 +83,13 @@ def update(options, filenames): id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose != "quiet": - print_("No ID3 header found; skipping...") + print_(u"No ID3 header found; skipping...") continue except Exception as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) continue - for tag in [t for t in id3 if t.startswith(("T", "COMM"))]: + for tag in filter(lambda t: t.startswith(("T", "COMM")), id3): frame = id3[tag] if isinstance(frame, mutagen.id3.TimeStampTextFrame): # non-unicode fields @@ -105,7 +104,7 @@ def update(options, filenames): continue else: frame.text = text - if not text or min(list(map(isascii, text))): + if not text or min(map(isascii, text)): frame.encoding = 3 else: frame.encoding = 1 @@ -122,7 +121,7 @@ def update(options, filenames): def has_id3v1(filename): try: - with open(filename, 'rb+') as f: + with open(filename, 'rb') as f: f.seek(-128, 2) return f.read(3) == b"TAG" except IOError: @@ -154,9 +153,9 @@ def main(argv): for i, arg in enumerate(argv): if arg == "-v1": - argv[i] = fsnative("--force-v1") + argv[i] = fsnative(u"--force-v1") elif arg == "-removev1": - argv[i] = fsnative("--remove-v1") + argv[i] = fsnative(u"--remove-v1") (options, args) = parser.parse_args(argv[1:]) diff --git a/lib/mutagen/_tools/mid3v2.py b/lib/mutagen/_tools/mid3v2.py old mode 100755 new mode 100644 index 4d813536..db78514c --- a/lib/mutagen/_tools/mid3v2.py +++ b/lib/mutagen/_tools/mid3v2.py @@ -11,6 +11,7 @@ import sys import codecs import mimetypes +import warnings from optparse import SUPPRESS_HELP @@ -19,7 +20,6 @@ import mutagen.id3 from mutagen.id3 import Encoding, PictureType from mutagen._senf import fsnative, print_, argv, fsn2text, fsn2bytes, \ bytes2fsn -from mutagen._compat import PY2, text_type from ._util import split_escape, SignalHandler, OptionParser @@ -55,23 +55,23 @@ Any editing operation will cause the ID3 tag to be upgraded to ID3v2.4. def list_frames(option, opt, value, parser): - items = list(mutagen.id3.Frames.items()) + items = mutagen.id3.Frames.items() for name, frame in sorted(items): - print_(" --%s %s" % (name, frame.__doc__.split("\n")[0])) + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) raise SystemExit def list_frames_2_2(option, opt, value, parser): - items = list(mutagen.id3.Frames_2_2.items()) + items = mutagen.id3.Frames_2_2.items() items.sort() for name, frame in items: - print_(" --%s %s" % (name, frame.__doc__.split("\n")[0])) + print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) raise SystemExit def list_genres(option, opt, value, parser): for i, genre in enumerate(mutagen.id3.TCON.GENRES): - print_("%3d: %s" % (i, genre)) + print_(u"%3d: %s" % (i, genre)) raise SystemExit @@ -79,7 +79,7 @@ def delete_tags(filenames, v1, v2): for filename in filenames: with _sig.block(): if verbose: - print_("deleting ID3 tag info in", filename, file=sys.stderr) + print_(u"deleting ID3 tag info in", filename, file=sys.stderr) mutagen.id3.delete(filename, v1, v2) @@ -88,22 +88,22 @@ def delete_frames(deletes, filenames): try: deletes = frame_from_fsnative(deletes) except ValueError as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) frames = deletes.split(",") for filename in filenames: with _sig.block(): if verbose: - print_("deleting %s from" % deletes, filename, + print_(u"deleting %s from" % deletes, filename, file=sys.stderr) try: id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose: - print_("No ID3 header found; skipping.", file=sys.stderr) + print_(u"No ID3 header found; skipping.", file=sys.stderr) except Exception as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) raise SystemExit(1) else: for frame in frames: @@ -119,26 +119,24 @@ def frame_from_fsnative(arg): assert isinstance(arg, fsnative) text = fsn2text(arg, strict=True) - if PY2: - return text.encode("ascii") - else: - return text.encode("ascii").decode("ascii") + return text.encode("ascii").decode("ascii") def value_from_fsnative(arg, escape): - """Takes an item from argv and returns a text_type value without + """Takes an item from argv and returns a str value without surrogate escapes or raises ValueError. """ assert isinstance(arg, fsnative) if escape: - bytes_ = fsn2bytes(arg, "utf-8") - if PY2: - bytes_ = bytes_.decode("string_escape") - else: + bytes_ = fsn2bytes(arg) + # With py3.7 this has started to warn for invalid escapes, but we + # don't control the input so ignore it. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") bytes_ = codecs.escape_decode(bytes_)[0] - arg = bytes2fsn(bytes_, "utf-8") + arg = bytes2fsn(bytes_) text = fsn2text(arg, strict=True) return text @@ -167,7 +165,7 @@ def write_files(edits, filenames, escape): try: frame = frame_from_fsnative(frame) except ValueError as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) assert isinstance(frame, str) @@ -177,9 +175,9 @@ def write_files(edits, filenames, escape): try: value = value_from_fsnative(value, escape) except ValueError as err: - error("%s: %s" % (frame, text_type(err))) + error(u"%s: %s" % (frame, str(err))) - assert isinstance(value, text_type) + assert isinstance(value, str) encoded_edits.append((frame, value)) edits = encoded_edits @@ -205,18 +203,18 @@ def write_files(edits, filenames, escape): for filename in filenames: with _sig.block(): if verbose: - print_("Writing", filename, file=sys.stderr) + print_(u"Writing", filename, file=sys.stderr) try: id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose: - print_("No ID3 header found; creating a new tag", + print_(u"No ID3 header found; creating a new tag", file=sys.stderr) id3 = mutagen.id3.ID3() except Exception as err: print_(str(err), file=sys.stderr) continue - for (frame, vlist) in list(edits.items()): + for (frame, vlist) in edits.items(): if frame == "POPM": for value in vlist: values = string_split(value, ":") @@ -240,13 +238,13 @@ def write_files(edits, filenames, escape): if len(values) >= 2: desc = values[1] else: - desc = "cover" + desc = u"cover" if len(values) >= 3: try: picture_type = int(values[2]) except ValueError: - error("Invalid picture type: %r" % values[1]) + error(u"Invalid picture type: %r" % values[1]) else: picture_type = PictureType.COVER_FRONT @@ -264,7 +262,7 @@ def write_files(edits, filenames, escape): with open(fn, "rb") as h: data = h.read() except IOError as e: - error(text_type(e)) + error(str(e)) frame = mutagen.id3.APIC(encoding=encoding, mime=mime, desc=desc, type=picture_type, data=data) @@ -283,11 +281,24 @@ def write_files(edits, filenames, escape): frame = mutagen.id3.COMM( encoding=3, text=value, lang=lang, desc=desc) id3.add(frame) + elif frame == "USLT": + for value in vlist: + values = string_split(value, ":") + if len(values) == 1: + value, desc, lang = values[0], "", "eng" + elif len(values) == 2: + desc, value, lang = values[0], values[1], "eng" + else: + value = ":".join(values[1:-1]) + desc, lang = values[0], values[-1] + frame = mutagen.id3.USLT( + encoding=3, text=value, lang=lang, desc=desc) + id3.add(frame) elif frame == "UFID": for value in vlist: values = string_split(value, ":") if len(values) != 2: - error("Invalid value: %r" % values) + error(u"Invalid value: %r" % values) owner = values[0] data = values[1].encode("utf-8") frame = mutagen.id3.UFID(owner=owner, data=data) @@ -302,9 +313,20 @@ def write_files(edits, filenames, escape): frame = mutagen.id3.TXXX( encoding=3, text=value, desc=desc) id3.add(frame) + elif frame == "WXXX": + for value in vlist: + values = string_split(value, ":", 1) + if len(values) == 1: + desc, value = "", values[0] + else: + desc, value = values[0], values[1] + frame = mutagen.id3.WXXX( + encoding=3, url=value, desc=desc) + id3.add(frame) elif issubclass(mutagen.id3.Frames[frame], mutagen.id3.UrlFrame): - frame = mutagen.id3.Frames[frame](encoding=3, url=vlist) + frame = mutagen.id3.Frames[frame]( + encoding=3, url=vlist[-1]) id3.add(frame) else: frame = mutagen.id3.Frames[frame](encoding=3, text=vlist) @@ -318,9 +340,9 @@ def list_tags(filenames): try: id3 = mutagen.id3.ID3(filename, translate=False) except mutagen.id3.ID3NoHeaderError: - print_("No ID3 header found; skipping.") + print_(u"No ID3 header found; skipping.") except Exception as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) raise SystemExit(1) else: print_(id3.pprint()) @@ -332,13 +354,13 @@ def list_tags_raw(filenames): try: id3 = mutagen.id3.ID3(filename, translate=False) except mutagen.id3.ID3NoHeaderError: - print_("No ID3 header found; skipping.") + print_(u"No ID3 header found; skipping.") except Exception as err: - print_(text_type(err), file=sys.stderr) + print_(str(err), file=sys.stderr) raise SystemExit(1) else: - for frame in list(id3.values()): - print_(text_type(repr(frame))) + for frame in id3.values(): + print_(str(repr(frame))) def main(argv): @@ -387,50 +409,51 @@ def main(argv): parser.add_option( "-a", "--artist", metavar='"ARTIST"', action="callback", help="Set the artist information", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TPE1"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TPE1"), args[2]))) parser.add_option( "-A", "--album", metavar='"ALBUM"', action="callback", help="Set the album title information", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TALB"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TALB"), args[2]))) parser.add_option( "-t", "--song", metavar='"SONG"', action="callback", help="Set the song title information", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TIT2"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TIT2"), args[2]))) parser.add_option( "-c", "--comment", metavar='"DESCRIPTION":"COMMENT":"LANGUAGE"', action="callback", help="Set the comment information", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--COMM"), + callback=lambda *args: args[3].edits.append((fsnative(u"--COMM"), args[2]))) parser.add_option( "-p", "--picture", metavar='"FILENAME":"DESCRIPTION":"IMAGE-TYPE":"MIME-TYPE"', action="callback", help="Set the picture", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--APIC"), + callback=lambda *args: args[3].edits.append((fsnative(u"--APIC"), args[2]))) parser.add_option( "-g", "--genre", metavar='"GENRE"', action="callback", help="Set the genre or genre number", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TCON"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TCON"), args[2]))) parser.add_option( "-y", "--year", "--date", metavar='YYYY[-MM-DD]', action="callback", help="Set the year/date", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TDRC"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TDRC"), args[2]))) parser.add_option( "-T", "--track", metavar='"num/num"', action="callback", help="Set the track number/(optional) total tracks", type="string", - callback=lambda *args: args[3].edits.append((fsnative("--TRCK"), + callback=lambda *args: args[3].edits.append((fsnative(u"--TRCK"), args[2]))) - for key, frame in list(mutagen.id3.Frames.items()): + for key, frame in mutagen.id3.Frames.items(): if (issubclass(frame, mutagen.id3.TextFrame) or issubclass(frame, mutagen.id3.UrlFrame) or issubclass(frame, mutagen.id3.POPM) - or frame in (mutagen.id3.APIC, mutagen.id3.UFID)): + or frame in (mutagen.id3.APIC, mutagen.id3.UFID, + mutagen.id3.USLT)): parser.add_option( "--" + key, action="callback", help=SUPPRESS_HELP, type='string', metavar="value", # optparse blows up with this diff --git a/lib/mutagen/_tools/moggsplit.py b/lib/mutagen/_tools/moggsplit.py old mode 100755 new mode 100644 index 8246e132..710f0dfe --- a/lib/mutagen/_tools/moggsplit.py +++ b/lib/mutagen/_tools/moggsplit.py @@ -46,28 +46,28 @@ def main(argv): with _sig.block(): fileobjs = {} format["base"] = os.path.splitext(os.path.basename(filename))[0] - fileobj = open(filename, "rb") - if options.m3u: - m3u = open(format["base"] + ".m3u", "w") - fileobjs["m3u"] = m3u - else: - m3u = None - while True: - try: - page = OggPage(fileobj) - except EOFError: - break + with open(filename, "rb") as fileobj: + if options.m3u: + m3u = open(format["base"] + ".m3u", "w") + fileobjs["m3u"] = m3u else: - format["stream"] = page.serial - if page.serial not in fileobjs: - new_filename = options.pattern % format - new_fileobj = open(new_filename, "wb") - fileobjs[page.serial] = new_fileobj - if m3u: - m3u.write(new_filename + "\r\n") - fileobjs[page.serial].write(page.write()) - for f in list(fileobjs.values()): - f.close() + m3u = None + while True: + try: + page = OggPage(fileobj) + except EOFError: + break + else: + format["stream"] = page.serial + if page.serial not in fileobjs: + new_filename = options.pattern % format + new_fileobj = open(new_filename, "wb") + fileobjs[page.serial] = new_fileobj + if m3u: + m3u.write(new_filename + "\r\n") + fileobjs[page.serial].write(page.write()) + for f in fileobjs.values(): + f.close() def entry_point(): diff --git a/lib/mutagen/_tools/mutagen_inspect.py b/lib/mutagen/_tools/mutagen_inspect.py old mode 100755 new mode 100644 index c16d38cf..0ab15f2a --- a/lib/mutagen/_tools/mutagen_inspect.py +++ b/lib/mutagen/_tools/mutagen_inspect.py @@ -9,7 +9,6 @@ """Full tag list for any given file.""" from mutagen._senf import print_, argv -from mutagen._compat import text_type from ._util import SignalHandler, OptionParser @@ -30,14 +29,14 @@ def main(argv): raise SystemExit(parser.print_help() or 1) for filename in args: - print_("--", filename) + print_(u"--", filename) try: - print_("-", File(filename).pprint()) + print_(u"-", File(filename).pprint()) except AttributeError: - print_("- Unknown file type") + print_(u"- Unknown file type") except Exception as err: - print_(text_type(err)) - print_("") + print_(str(err)) + print_(u"") def entry_point(): diff --git a/lib/mutagen/_tools/mutagen_pony.py b/lib/mutagen/_tools/mutagen_pony.py old mode 100755 new mode 100644 index 7db4b668..e4a496c7 --- a/lib/mutagen/_tools/mutagen_pony.py +++ b/lib/mutagen/_tools/mutagen_pony.py @@ -83,7 +83,7 @@ def check_dir(path): from mutagen.mp3 import MP3 rep = Report(path) - print_("Scanning", path) + print_(u"Scanning", path) for path, dirs, files in os.walk(path): files.sort() for fn in files: @@ -105,7 +105,7 @@ def check_dir(path): def main(argv): if len(argv) == 1: - print_("Usage:", argv[0], "directory ...") + print_(u"Usage:", argv[0], u"directory ...") else: for path in argv[1:]: check_dir(path) diff --git a/lib/mutagen/_util.py b/lib/mutagen/_util.py old mode 100755 new mode 100644 index 887d4fa2..7dbb57d9 --- a/lib/mutagen/_util.py +++ b/lib/mutagen/_util.py @@ -16,21 +16,48 @@ import sys import struct import codecs import errno - -try: - import mmap -except ImportError: - # Google App Engine has no mmap: - # https://github.com/quodlibet/mutagen/issues/286 - mmap = None +import decimal +from io import BytesIO from collections import namedtuple from contextlib import contextmanager from functools import wraps from fnmatch import fnmatchcase -from ._compat import chr_, PY2, iteritems, iterbytes, integer_types, xrange, \ - izip, text_type, reraise + +_DEFAULT_BUFFER_SIZE = 2 ** 18 + + +def endswith(text, end): + # usefull for paths which can be both, str and bytes + if isinstance(text, str): + if not isinstance(end, str): + end = end.decode("ascii") + else: + if not isinstance(end, bytes): + end = end.encode("ascii") + return text.endswith(end) + + +def reraise(tp, value, tb): + raise tp(value).with_traceback(tb) + + +def bchr(x): + return bytes([x]) + + +def iterbytes(b): + return (bytes([v]) for v in b) + + +def intround(value): + """Given a float returns a rounded int. Should give the same result on + both Py2/3 + """ + + return int(decimal.Decimal.from_float( + value).to_integral_value(decimal.ROUND_HALF_EVEN)) def is_fileobj(fileobj): @@ -39,8 +66,8 @@ def is_fileobj(fileobj): file object """ - # open() only handles str/bytes, so we can be strict - return not isinstance(fileobj, (text_type, bytes)) + return not (isinstance(fileobj, (str, bytes)) or + hasattr(fileobj, "__fspath__")) def verify_fileobj(fileobj, writable=False): @@ -93,9 +120,9 @@ def fileobj_name(fileobj): path type, but might be empty or non-existent. """ - value = getattr(fileobj, "name", "") - if not isinstance(value, (text_type, bytes)): - value = text_type(value) + value = getattr(fileobj, "name", u"") + if not isinstance(value, (str, bytes)): + value = str(value) return value @@ -199,6 +226,10 @@ def _openfile(instance, filething, filename, fileobj, writable, create): if filething is not None: if is_fileobj(filething): fileobj = filething + elif hasattr(filething, "__fspath__"): + filename = filething.__fspath__() + if not isinstance(filename, (bytes, str)): + raise TypeError("expected __fspath__() to return a filename") else: filename = filething @@ -214,10 +245,24 @@ def _openfile(instance, filething, filename, fileobj, writable, create): yield FileThing(fileobj, filename, filename or fileobj_name(fileobj)) elif filename is not None: verify_filename(filename) + + inmemory_fileobj = False try: fileobj = open(filename, "rb+" if writable else "rb") except IOError as e: - if create and e.errno == errno.ENOENT: + if writable and e.errno == errno.EOPNOTSUPP: + # Some file systems (gvfs over fuse) don't support opening + # files read/write. To make things still work read the whole + # file into an in-memory file like object and write it back + # later. + # https://github.com/quodlibet/mutagen/issues/300 + try: + with open(filename, "rb") as fileobj: + fileobj = BytesIO(fileobj.read()) + except IOError as e2: + raise MutagenError(e2) + inmemory_fileobj = True + elif create and e.errno == errno.ENOENT: assert writable try: fileobj = open(filename, "wb+") @@ -228,6 +273,15 @@ def _openfile(instance, filething, filename, fileobj, writable, create): with fileobj as fileobj: yield FileThing(fileobj, filename, filename) + + if inmemory_fileobj: + assert writable + data = fileobj.getvalue() + try: + with open(filename, "wb") as fileobj: + fileobj.write(data) + except IOError as e: + raise MutagenError(e) else: raise TypeError("Missing filename or fileobj argument") @@ -264,9 +318,6 @@ def hashable(cls): Needs a working __eq__ and __hash__ and will add a __ne__. """ - # py2 - assert "__hash__" in cls.__dict__ - # py3 assert cls.__dict__["__hash__"] is not None assert "__eq__" in cls.__dict__ @@ -302,8 +353,8 @@ def enum(cls): new_type.__module__ = cls.__module__ map_ = {} - for key, value in iteritems(d): - if key.upper() == key and isinstance(value, integer_types): + for key, value in d.items(): + if key.upper() == key and isinstance(value, int): value_instance = new_type(value) setattr(new_type, key, value_instance) map_[value] = key @@ -351,8 +402,8 @@ def flags(cls): new_type.__module__ = cls.__module__ map_ = {} - for key, value in iteritems(d): - if key.upper() == key and isinstance(value, integer_types): + for key, value in d.items(): + if key.upper() == key and isinstance(value, int): value_instance = new_type(value) setattr(new_type, key, value_instance) map_[value] = key @@ -360,12 +411,12 @@ def flags(cls): def str_(self): value = int(self) matches = [] - for k, v in list(map_.items()): + for k, v in map_.items(): if value & k: matches.append("%s.%s" % (type(self).__name__, v)) value &= ~k if value != 0 or not matches: - matches.append(text_type(value)) + matches.append(str(value)) return " | ".join(matches) @@ -395,7 +446,7 @@ class DictMixin(object): """ def __iter__(self): - return iter(list(self.keys())) + return iter(self.keys()) def __has_key(self, key): try: @@ -405,25 +456,13 @@ class DictMixin(object): else: return True - if PY2: - has_key = __has_key - __contains__ = __has_key - if PY2: - iterkeys = lambda self: iter(list(self.keys())) - def values(self): - return [self[k] for k in list(self.keys())] - - if PY2: - itervalues = lambda self: iter(list(self.values())) + return [self[k] for k in self.keys()] def items(self): - return list(zip(list(self.keys()), list(self.values()))) - - if PY2: - iteritems = lambda s: iter(list(s.items())) + return list(zip(self.keys(), self.values())) def clear(self): for key in list(self.keys()): @@ -443,7 +482,7 @@ class DictMixin(object): return value def popitem(self): - for key in list(self.keys()): + for key in self.keys(): break else: raise KeyError("dictionary is empty") @@ -455,7 +494,7 @@ class DictMixin(object): other = {} try: - for key, value in list(other.items()): + for key, value in other.items(): self.__setitem__(key, value) except AttributeError: for key, value in other: @@ -475,18 +514,18 @@ class DictMixin(object): return default def __repr__(self): - return repr(dict(list(self.items()))) + return repr(dict(self.items())) def __eq__(self, other): - return dict(list(self.items())) == other + return dict(self.items()) == other def __lt__(self, other): - return dict(list(self.items())) < other + return dict(self.items()) < other __hash__ = object.__hash__ def __len__(self): - return len(list(self.keys())) + return len(self.keys()) class DictProxy(DictMixin): @@ -504,7 +543,7 @@ class DictProxy(DictMixin): del(self.__dict[key]) def keys(self): - return list(self.__dict.keys()) + return self.__dict.keys() def _fill_cdata(cls): @@ -553,7 +592,7 @@ def _fill_cdata(cls): funcs["to_%s%s%s" % (prefix, name, esuffix)] = pack funcs["to_%sint%s%s" % (prefix, bits, esuffix)] = pack - for key, func in iteritems(funcs): + for key, func in funcs.items(): setattr(cls, key, staticmethod(func)) @@ -564,11 +603,10 @@ class cdata(object): uint32_le(data)/to_uint32_le(num)/uint32_le_from(data, offset=0) """ - from struct import error - error = error + error = struct.error bitswap = b''.join( - chr_(sum(((val >> i) & 1) << (7 - i) for i in range(8))) + bchr(sum(((val >> i) & 1) << (7 - i) for i in range(8))) for val in range(256)) test_bit = staticmethod(lambda value, n: bool((value >> n) & 1)) @@ -598,7 +636,7 @@ def get_size(fileobj): def read_full(fileobj, size): - """Like fileobj.read but raises IOError if no all requested data is + """Like fileobj.read but raises IOError if not all requested data is returned. If you want to distinguish IOError and the EOS case, better handle @@ -645,65 +683,7 @@ def seek_end(fileobj, offset): fileobj.seek(-offset, 2) -def mmap_move(fileobj, dest, src, count): - """Mmaps the file object if possible and moves 'count' data - from 'src' to 'dest'. All data has to be inside the file size - (enlarging the file through this function isn't possible) - - Will adjust the file offset. - - Args: - fileobj (fileobj) - dest (int): The destination offset - src (int): The source offset - count (int) The amount of data to move - Raises: - mmap.error: In case move failed - IOError: In case an operation on the fileobj fails - ValueError: In case invalid parameters were given - """ - - assert mmap is not None, "no mmap support" - - if dest < 0 or src < 0 or count < 0: - raise ValueError("Invalid parameters") - - try: - fileno = fileobj.fileno() - except (AttributeError, IOError): - raise mmap.error( - "File object does not expose/support a file descriptor") - - fileobj.seek(0, 2) - filesize = fileobj.tell() - length = max(dest, src) + count - - if length > filesize: - raise ValueError("Not in file size boundary") - - offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) * - mmap.ALLOCATIONGRANULARITY) - assert dest >= offset - assert src >= offset - assert offset % mmap.ALLOCATIONGRANULARITY == 0 - - # Windows doesn't handle empty mappings, add a fast path here instead - if count == 0: - return - - # fast path - if src == dest: - return - - fileobj.flush() - file_map = mmap.mmap(fileno, length - offset, offset=offset) - try: - file_map.move(dest - offset, src - offset, count) - finally: - file_map.close() - - -def resize_file(fobj, diff, BUFFER_SIZE=2 ** 16): +def resize_file(fobj, diff, BUFFER_SIZE=_DEFAULT_BUFFER_SIZE): """Resize a file by `diff`. New space will be filled with zeros. @@ -740,7 +720,7 @@ def resize_file(fobj, diff, BUFFER_SIZE=2 ** 16): raise -def fallback_move(fobj, dest, src, count, BUFFER_SIZE=2 ** 16): +def move_bytes(fobj, dest, src, count, BUFFER_SIZE=_DEFAULT_BUFFER_SIZE): """Moves data around using read()/write(). Args: @@ -783,12 +763,11 @@ def fallback_move(fobj, dest, src, count, BUFFER_SIZE=2 ** 16): fobj.flush() -def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): +def insert_bytes(fobj, size, offset, BUFFER_SIZE=_DEFAULT_BUFFER_SIZE): """Insert size bytes of empty space starting at offset. fobj must be an open file object, open rb+ or - equivalent. Mutagen tries to use mmap to resize the file, but - falls back to a significantly slower method if mmap fails. + equivalent. Args: fobj (fileobj) @@ -809,22 +788,14 @@ def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): raise ValueError resize_file(fobj, size, BUFFER_SIZE) - - if mmap is not None: - try: - mmap_move(fobj, offset + size, offset, movesize) - except mmap.error: - fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) - else: - fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE) + move_bytes(fobj, offset + size, offset, movesize, BUFFER_SIZE) -def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): +def delete_bytes(fobj, size, offset, BUFFER_SIZE=_DEFAULT_BUFFER_SIZE): """Delete size bytes of empty space starting at offset. fobj must be an open file object, open rb+ or - equivalent. Mutagen tries to use mmap to resize the file, but - falls back to a significantly slower method if mmap fails. + equivalent. Args: fobj (fileobj) @@ -844,14 +815,7 @@ def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16): if movesize < 0: raise ValueError - if mmap is not None: - try: - mmap_move(fobj, offset, offset + size, movesize) - except mmap.error: - fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) - else: - fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE) - + move_bytes(fobj, offset, offset + size, movesize, BUFFER_SIZE) resize_file(fobj, -size, BUFFER_SIZE) @@ -895,7 +859,7 @@ def dict_match(d, key, default=None): if key in d and "[" not in key: return d[key] else: - for pattern, value in iteritems(d): + for pattern, value in d.items(): if fnmatchcase(key, pattern): return value return default @@ -976,15 +940,15 @@ def decode_terminated(data, encoding, strict=True): r = [] for i, b in enumerate(iterbytes(data)): c = decoder.decode(b) - if c == "\x00": - return "".join(r), data[i + 1:] + if c == u"\x00": + return u"".join(r), data[i + 1:] r.append(c) else: # make sure the decoder is finished r.append(decoder.decode(b"", True)) if strict: raise ValueError("not null terminated") - return "".join(r), b"" + return u"".join(r), b"" class BitReaderError(Exception): diff --git a/lib/mutagen/_vorbis.py b/lib/mutagen/_vorbis.py old mode 100755 new mode 100644 index ec2fc8b7..f71144d3 --- a/lib/mutagen/_vorbis.py +++ b/lib/mutagen/_vorbis.py @@ -17,10 +17,10 @@ The specification is at http://www.xiph.org/vorbis/doc/v-comment.html. """ import sys +from io import BytesIO import mutagen -from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2 -from mutagen._util import DictMixin, cdata, MutagenError +from mutagen._util import DictMixin, cdata, MutagenError, reraise def is_valid_key(key): @@ -32,7 +32,7 @@ def is_valid_key(key): Takes str/unicode in Python 2, unicode in Python 3 """ - if PY3 and isinstance(key, bytes): + if isinstance(key, bytes): raise TypeError("needs to be str not bytes") for c in key: @@ -71,7 +71,7 @@ class VComment(mutagen.Tags, list): vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen' """ - vendor = "Mutagen " + mutagen.version_string + vendor = u"Mutagen " + mutagen.version_string def __init__(self, data=None, *args, **kwargs): self._size = 0 @@ -116,7 +116,7 @@ class VComment(mutagen.Tags, list): if errors == "ignore": continue elif errors == "replace": - tag, value = "unknown%d" % i, string + tag, value = u"unknown%d" % i, string else: reraise(VorbisEncodingError, err, sys.exc_info()[2]) try: @@ -124,9 +124,7 @@ class VComment(mutagen.Tags, list): except UnicodeEncodeError: raise VorbisEncodingError("invalid tag name %r" % tag) else: - # string keys in py3k - if PY3: - tag = tag.decode("ascii") + tag = tag.decode("ascii") if is_valid_key(tag): self.append((tag, value)) @@ -145,30 +143,19 @@ class VComment(mutagen.Tags, list): In Python 3 all keys and values have to be a string. """ - if not isinstance(self.vendor, text_type): - if PY3: - raise ValueError("vendor needs to be str") - - try: - self.vendor.decode('utf-8') - except UnicodeDecodeError: - raise ValueError + if not isinstance(self.vendor, str): + raise ValueError("vendor needs to be str") for key, value in self: try: if not is_valid_key(key): - raise ValueError + raise ValueError("%r is not a valid key" % key) except TypeError: raise ValueError("%r is not a valid key" % key) - if not isinstance(value, text_type): - if PY3: - raise ValueError("%r needs to be str" % key) - - try: - value.decode("utf-8") - except: - raise ValueError("%r is not a valid value" % value) + if not isinstance(value, str): + err = "%r needs to be str for key %r" % (value, key) + raise ValueError(err) return True @@ -213,12 +200,12 @@ class VComment(mutagen.Tags, list): def pprint(self): def _decode(value): - if not isinstance(value, text_type): + if not isinstance(value, str): return value.decode('utf-8', 'replace') return value - tags = ["%s=%s" % (_decode(k), _decode(v)) for k, v in self] - return "\n".join(tags) + tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self] + return u"\n".join(tags) class VCommentDict(VComment, DictMixin): @@ -242,7 +229,6 @@ class VCommentDict(VComment, DictMixin): work. """ - # PY3 only if isinstance(key, slice): return VComment.__getitem__(self, key) @@ -260,7 +246,6 @@ class VCommentDict(VComment, DictMixin): def __delitem__(self, key): """Delete all values associated with the key.""" - # PY3 only if isinstance(key, slice): return VComment.__delitem__(self, key) @@ -296,7 +281,6 @@ class VCommentDict(VComment, DictMixin): string. """ - # PY3 only if isinstance(key, slice): return VComment.__setitem__(self, key, values) @@ -310,9 +294,6 @@ class VCommentDict(VComment, DictMixin): except KeyError: pass - if PY2: - key = key.encode('ascii') - for value in values: self.append((key, value)) @@ -324,4 +305,4 @@ class VCommentDict(VComment, DictMixin): def as_dict(self): """Return a copy of the comment data in a real dict.""" - return dict([(key, self[key]) for key in list(self.keys())]) + return dict([(key, self[key]) for key in self.keys()]) diff --git a/lib/mutagen/aac.py b/lib/mutagen/aac.py old mode 100755 new mode 100644 index ef220180..30162d03 --- a/lib/mutagen/aac.py +++ b/lib/mutagen/aac.py @@ -15,9 +15,8 @@ from mutagen import StreamInfo from mutagen._file import FileType from mutagen._util import BitReader, BitReaderError, MutagenError, loadfile, \ - convert_error + convert_error, endswith from mutagen.id3._util import BitPaddedInt -from mutagen._compat import endswith, xrange _FREQS = [ @@ -375,10 +374,13 @@ class AACInfo(StreamInfo): fileobj.seek(0, 2) stream_size = fileobj.tell() - (offset + s.offset) # approx - self.length = float(s.samples * stream_size) / (s.size * s.frequency) + self.length = 0.0 + if s.frequency != 0: + self.length = \ + float(s.samples * stream_size) / (s.size * s.frequency) def pprint(self): - return "AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( + return u"AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( self._type, self.sample_rate, self.length, self.channels, self.bitrate) diff --git a/lib/mutagen/ac3.py b/lib/mutagen/ac3.py new file mode 100644 index 00000000..467434e5 --- /dev/null +++ b/lib/mutagen/ac3.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2019 Philipp Wolfer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + + +"""Pure AC3 file information. +""" + +__all__ = ["AC3", "Open"] + +from mutagen import StreamInfo +from mutagen._file import FileType +from mutagen._util import ( + BitReader, + BitReaderError, + MutagenError, + convert_error, + enum, + loadfile, + endswith, +) + + +@enum +class ChannelMode(object): + DUALMONO = 0 + MONO = 1 + STEREO = 2 + C3F = 3 + C2F1R = 4 + C3F1R = 5 + C2F2R = 6 + C3F2R = 7 + + +AC3_CHANNELS = { + ChannelMode.DUALMONO: 2, + ChannelMode.MONO: 1, + ChannelMode.STEREO: 2, + ChannelMode.C3F: 3, + ChannelMode.C2F1R: 3, + ChannelMode.C3F1R: 4, + ChannelMode.C2F2R: 4, + ChannelMode.C3F2R: 5 +} + +AC3_HEADER_SIZE = 7 + +AC3_SAMPLE_RATES = [48000, 44100, 32000] + +AC3_BITRATES = [ + 32, 40, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384, 448, 512, 576, 640 +] + + +@enum +class EAC3FrameType(object): + INDEPENDENT = 0 + DEPENDENT = 1 + AC3_CONVERT = 2 + RESERVED = 3 + + +EAC3_BLOCKS = [1, 2, 3, 6] + + +class AC3Error(MutagenError): + pass + + +class AC3Info(StreamInfo): + + """AC3 stream information. + The length of the stream is just a guess and might not be correct. + + Attributes: + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bitrate (`int`): audio bitrate, in bits per second + codec (`str`): ac-3 or ec-3 (Enhanced AC-3) + """ + + channels = 0 + length = 0 + sample_rate = 0 + bitrate = 0 + codec = 'ac-3' + + @convert_error(IOError, AC3Error) + def __init__(self, fileobj): + """Raises AC3Error""" + header = bytearray(fileobj.read(6)) + + if len(header) < 6: + raise AC3Error("not enough data") + + if not header.startswith(b"\x0b\x77"): + raise AC3Error("not a AC3 file") + + bitstream_id = header[5] >> 3 + if bitstream_id > 16: + raise AC3Error("invalid bitstream_id %i" % bitstream_id) + + fileobj.seek(2) + self._read_header(fileobj, bitstream_id) + + def _read_header(self, fileobj, bitstream_id): + bitreader = BitReader(fileobj) + try: + # This is partially based on code from + # https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/ac3_parser.c + if bitstream_id <= 10: # Normal AC-3 + self._read_header_normal(bitreader, bitstream_id) + else: # Enhanced AC-3 + self._read_header_enhanced(bitreader) + except BitReaderError as e: + raise AC3Error(e) + + self.length = self._guess_length(fileobj) + + def _read_header_normal(self, bitreader, bitstream_id): + r = bitreader + r.skip(16) # 16 bit CRC + sr_code = r.bits(2) + if sr_code == 3: + raise AC3Error("invalid sample rate code %i" % sr_code) + + frame_size_code = r.bits(6) + if frame_size_code > 37: + raise AC3Error("invalid frame size code %i" % frame_size_code) + + r.skip(5) # bitstream ID, already read + r.skip(3) # bitstream mode, not needed + channel_mode = ChannelMode(r.bits(3)) + r.skip(2) # dolby surround mode or surround mix level + lfe_on = r.bits(1) + + sr_shift = max(bitstream_id, 8) - 8 + try: + self.sample_rate = AC3_SAMPLE_RATES[sr_code] >> sr_shift + self.bitrate = (AC3_BITRATES[frame_size_code >> 1] * 1000 + ) >> sr_shift + except KeyError as e: + raise AC3Error(e) + self.channels = self._get_channels(channel_mode, lfe_on) + self._skip_unused_header_bits_normal(r, channel_mode) + + def _read_header_enhanced(self, bitreader): + r = bitreader + self.codec = "ec-3" + frame_type = r.bits(2) + if frame_type == EAC3FrameType.RESERVED: + raise AC3Error("invalid frame type %i" % frame_type) + + r.skip(3) # substream ID, not needed + + frame_size = (r.bits(11) + 1) << 1 + if frame_size < AC3_HEADER_SIZE: + raise AC3Error("invalid frame size %i" % frame_size) + + sr_code = r.bits(2) + try: + if sr_code == 3: + sr_code2 = r.bits(2) + if sr_code2 == 3: + raise AC3Error("invalid sample rate code %i" % sr_code2) + + numblocks_code = 3 + self.sample_rate = AC3_SAMPLE_RATES[sr_code2] // 2 + else: + numblocks_code = r.bits(2) + self.sample_rate = AC3_SAMPLE_RATES[sr_code] + + channel_mode = ChannelMode(r.bits(3)) + lfe_on = r.bits(1) + self.bitrate = 8 * frame_size * self.sample_rate // ( + EAC3_BLOCKS[numblocks_code] * 256) + except KeyError as e: + raise AC3Error(e) + r.skip(5) # bitstream ID, already read + self.channels = self._get_channels(channel_mode, lfe_on) + self._skip_unused_header_bits_enhanced( + r, frame_type, channel_mode, sr_code, numblocks_code) + + @staticmethod + def _skip_unused_header_bits_normal(bitreader, channel_mode): + r = bitreader + r.skip(5) # Dialogue Normalization + if r.bits(1): # Compression Gain Word Exists + r.skip(8) # Compression Gain Word + if r.bits(1): # Language Code Exists + r.skip(8) # Language Code + if r.bits(1): # Audio Production Information Exists + # Mixing Level, 5 Bits + # Room Type, 2 Bits + r.skip(7) + if channel_mode == ChannelMode.DUALMONO: + r.skip(5) # Dialogue Normalization, ch2 + if r.bits(1): # Compression Gain Word Exists, ch2 + r.skip(8) # Compression Gain Word, ch2 + if r.bits(1): # Language Code Exists, ch2 + r.skip(8) # Language Code, ch2 + if r.bits(1): # Audio Production Information Exists, ch2 + # Mixing Level, ch2, 5 Bits + # Room Type, ch2, 2 Bits + r.skip(7) + # Copyright Bit, 1 Bit + # Original Bit Stream, 1 Bit + r.skip(2) + timecod1e = r.bits(1) # Time Code First Halve Exists + timecod2e = r.bits(1) # Time Code Second Halve Exists + if timecod1e: + r.skip(14) # Time Code First Half + if timecod2e: + r.skip(14) # Time Code Second Half + if r.bits(1): # Additional Bit Stream Information Exists + addbsil = r.bit(6) # Additional Bit Stream Information Length + r.skip((addbsil + 1) * 8) + + @staticmethod + def _skip_unused_header_bits_enhanced(bitreader, frame_type, channel_mode, + sr_code, numblocks_code): + r = bitreader + r.skip(5) # Dialogue Normalization + if r.bits(1): # Compression Gain Word Exists + r.skip(8) # Compression Gain Word + if channel_mode == ChannelMode.DUALMONO: + r.skip(5) # Dialogue Normalization, ch2 + if r.bits(1): # Compression Gain Word Exists, ch2 + r.skip(8) # Compression Gain Word, ch2 + if frame_type == EAC3FrameType.DEPENDENT: + if r.bits(1): # chanmap exists + r.skip(16) # chanmap + if r.bits(1): # mixmdate, 1 Bit + # FIXME: Handle channel dependent fields + return + if r.bits(1): # Informational Metadata Exists + # bsmod, 3 Bits + # Copyright Bit, 1 Bit + # Original Bit Stream, 1 Bit + r.skip(5) + if channel_mode == ChannelMode.STEREO: + # dsurmod. 2 Bits + # dheadphonmod, 2 Bits + r.skip(4) + elif channel_mode >= ChannelMode.C2F2R: + r.skip(2) # dsurexmod + if r.bits(1): # Audio Production Information Exists + # Mixing Level, 5 Bits + # Room Type, 2 Bits + # adconvtyp, 1 Bit + r.skip(8) + if channel_mode == ChannelMode.DUALMONO: + if r.bits(1): # Audio Production Information Exists, ch2 + # Mixing Level, ch2, 5 Bits + # Room Type, ch2, 2 Bits + # adconvtyp, ch2, 1 Bit + r.skip(8) + if sr_code < 3: # if not half sample rate + r.skip(1) # sourcefscod + if frame_type == EAC3FrameType.INDEPENDENT and numblocks_code == 3: + r.skip(1) # convsync + if frame_type == EAC3FrameType.AC3_CONVERT: + if numblocks_code != 3: + if r.bits(1): # blkid + r.skip(6) # frmsizecod + if r.bits(1): # Additional Bit Stream Information Exists + addbsil = r.bit(6) # Additional Bit Stream Information Length + r.skip((addbsil + 1) * 8) + + @staticmethod + def _get_channels(channel_mode, lfe_on): + try: + return AC3_CHANNELS[channel_mode] + lfe_on + except KeyError as e: + raise AC3Error(e) + + def _guess_length(self, fileobj): + # use bitrate + data size to guess length + if self.bitrate == 0: + return + start = fileobj.tell() + fileobj.seek(0, 2) + length = fileobj.tell() - start + return 8.0 * length / self.bitrate + + def pprint(self): + return u"%s, %d Hz, %.2f seconds, %d channel(s), %d bps" % ( + self.codec, self.sample_rate, self.length, self.channels, + self.bitrate) + + +class AC3(FileType): + """AC3(filething) + + Arguments: + filething (filething) + + Load AC3 or EAC3 files. + + Tagging is not supported. + Use the ID3/APEv2 classes directly instead. + + Attributes: + info (`AC3Info`) + """ + + _mimes = ["audio/ac3"] + + @loadfile() + def load(self, filething): + self.info = AC3Info(filething.fileobj) + + def add_tags(self): + raise AC3Error("doesn't support tags") + + @staticmethod + def score(filename, fileobj, header): + return header.startswith(b"\x0b\x77") * 2 \ + + (endswith(filename, ".ac3") or endswith(filename, ".eac3")) + + +Open = AC3 +error = AC3Error diff --git a/lib/mutagen/aiff.py b/lib/mutagen/aiff.py old mode 100755 new mode 100644 index 814c042e..7c0015d1 --- a/lib/mutagen/aiff.py +++ b/lib/mutagen/aiff.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2014 Evan Purkhiser # 2014 Ben Ockmore +# 2019-2020 Philipp Wolfer # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -9,26 +10,30 @@ """AIFF audio stream information and tags.""" -import sys import struct from struct import pack -from ._compat import endswith, text_type, reraise from mutagen import StreamInfo, FileType -from mutagen.id3 import ID3 from mutagen.id3._util import ID3NoHeaderError, error as ID3Error -from mutagen._util import resize_bytes, delete_bytes, MutagenError, loadfile, \ - convert_error +from mutagen._iff import ( + IffChunk, + IffContainerChunkMixin, + IffFile, + IffID3, + InvalidChunk, + error as IffError, +) +from mutagen._util import ( + convert_error, + loadfile, + endswith, +) __all__ = ["AIFF", "Open", "delete"] -class error(MutagenError): - pass - - -class InvalidChunk(error): +class error(IffError): pass @@ -36,14 +41,10 @@ class InvalidChunk(error): _HUGE_VAL = 1.79769313486231e+308 -def is_valid_chunk_id(id): - assert isinstance(id, text_type) +def read_float(data): + """Raises OverflowError""" - return ((len(id) <= 4) and (min(id) >= ' ') and - (max(id) <= '~')) - - -def read_float(data): # 10 bytes + assert len(data) == 10 expon, himant, lomant = struct.unpack('>hLL', data) sign = 1 if expon < 0: @@ -52,168 +53,70 @@ def read_float(data): # 10 bytes if expon == himant == lomant == 0: f = 0.0 elif expon == 0x7FFF: - f = _HUGE_VAL + raise OverflowError("inf and nan not supported") else: expon = expon - 16383 + # this can raise OverflowError too f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) return sign * f -class IFFChunk(object): +class AIFFChunk(IffChunk): """Representation of a single IFF chunk""" - # Chunk headers are 8 bytes long (4 for ID and 4 for the size) - HEADER_SIZE = 8 + @classmethod + def parse_header(cls, header): + return struct.unpack('>4sI', header) - def __init__(self, fileobj, parent_chunk=None): - self.__fileobj = fileobj - self.parent_chunk = parent_chunk - self.offset = fileobj.tell() + @classmethod + def get_class(cls, id): + if id == 'FORM': + return AIFFFormChunk + else: + return cls - header = fileobj.read(self.HEADER_SIZE) - if len(header) < self.HEADER_SIZE: - raise InvalidChunk() + def write_new_header(self, id_, size): + self._fileobj.write(pack('>4sI', id_, size)) - self.id, self.data_size = struct.unpack('>4si', header) - - try: - self.id = self.id.decode('ascii') - except UnicodeDecodeError: - raise InvalidChunk() - - if not is_valid_chunk_id(self.id): - raise InvalidChunk() - - self.size = self.HEADER_SIZE + self.data_size - self.data_offset = fileobj.tell() - - def read(self): - """Read the chunks data""" - - self.__fileobj.seek(self.data_offset) - return self.__fileobj.read(self.data_size) - - def write(self, data): - """Write the chunk data""" - - if len(data) > self.data_size: - raise ValueError - - self.__fileobj.seek(self.data_offset) - self.__fileobj.write(data) - - def delete(self): - """Removes the chunk from the file""" - - delete_bytes(self.__fileobj, self.size, self.offset) - if self.parent_chunk is not None: - self.parent_chunk._update_size( - self.parent_chunk.data_size - self.size) - - def _update_size(self, data_size): - """Update the size of the chunk""" - - self.__fileobj.seek(self.offset + 4) - self.__fileobj.write(pack('>I', data_size)) - if self.parent_chunk is not None: - size_diff = self.data_size - data_size - self.parent_chunk._update_size( - self.parent_chunk.data_size - size_diff) - self.data_size = data_size - self.size = data_size + self.HEADER_SIZE - - def resize(self, new_data_size): - """Resize the file and update the chunk sizes""" - - resize_bytes( - self.__fileobj, self.data_size, new_data_size, self.data_offset) - self._update_size(new_data_size) + def write_size(self): + self._fileobj.write(pack('>I', self.data_size)) -class IFFFile(object): - """Representation of a IFF file""" +class AIFFFormChunk(AIFFChunk, IffContainerChunkMixin): + """The AIFF root chunk.""" + + def parse_next_subchunk(self): + return AIFFChunk.parse(self._fileobj, self) + + def __init__(self, fileobj, id, data_size, parent_chunk): + if id != u'FORM': + raise InvalidChunk('Expected FORM chunk, got %s' % id) + + AIFFChunk.__init__(self, fileobj, id, data_size, parent_chunk) + self.init_container() + + +class AIFFFile(IffFile): + """Representation of a AIFF file""" def __init__(self, fileobj): - self.__fileobj = fileobj - self.__chunks = {} - # AIFF Files always start with the FORM chunk which contains a 4 byte # ID before the start of other chunks - fileobj.seek(0) - self.__chunks['FORM'] = IFFChunk(fileobj) + super().__init__(AIFFChunk, fileobj) - # Skip past the 4 byte FORM id - fileobj.seek(IFFChunk.HEADER_SIZE + 4) - - # Where the next chunk can be located. We need to keep track of this - # since the size indicated in the FORM header may not match up with the - # offset determined from the size of the last chunk in the file - self.__next_offset = fileobj.tell() - - # Load all of the chunks - while True: - try: - chunk = IFFChunk(fileobj, self['FORM']) - except InvalidChunk: - break - self.__chunks[chunk.id.strip()] = chunk - - # Calculate the location of the next chunk, - # considering the pad byte - self.__next_offset = chunk.offset + chunk.size - self.__next_offset += self.__next_offset % 2 - fileobj.seek(self.__next_offset) + if self.root.id != u'FORM': + raise InvalidChunk("Root chunk must be a FORM chunk, got %s" + % self.root.id) def __contains__(self, id_): - """Check if the IFF file contains a specific chunk""" - - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") - - return id_ in self.__chunks + if id_ == 'FORM': # For backwards compatibility + return True + return super().__contains__(id_) def __getitem__(self, id_): - """Get a chunk from the IFF file""" - - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") - - try: - return self.__chunks[id_] - except KeyError: - raise KeyError( - "%r has no %r chunk" % (self.__fileobj, id_)) - - def __delitem__(self, id_): - """Remove a chunk from the IFF file""" - - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") - - self.__chunks.pop(id_).delete() - - def insert_chunk(self, id_): - """Insert a new chunk at the end of the IFF file""" - - assert isinstance(id_, text_type) - - if not is_valid_chunk_id(id_): - raise KeyError("AIFF key must be four ASCII characters.") - - self.__fileobj.seek(self.__next_offset) - self.__fileobj.write(pack('>4si', id_.ljust(4).encode('ascii'), 0)) - self.__fileobj.seek(self.__next_offset) - chunk = IFFChunk(self.__fileobj, self['FORM']) - self['FORM']._update_size(self['FORM'].data_size + chunk.size) - - self.__chunks[id_] = chunk - self.__next_offset = chunk.offset + chunk.size + if id_ == 'FORM': # For backwards compatibility + return self.root + return super().__getitem__(id_) class AIFFInfo(StreamInfo): @@ -228,7 +131,7 @@ class AIFFInfo(StreamInfo): bitrate (`int`): audio bitrate, in bits per second channels (`int`): The number of audio channels sample_rate (`int`): audio sample rate, in Hz - sample_size (`int`): The audio sample size + bits_per_sample (`int`): The audio sample size """ length = 0 @@ -240,9 +143,9 @@ class AIFFInfo(StreamInfo): def __init__(self, fileobj): """Raises error""" - iff = IFFFile(fileobj) + iff = AIFFFile(fileobj) try: - common_chunk = iff['COMM'] + common_chunk = iff[u'COMM'] except KeyError as e: raise error(str(e)) @@ -253,61 +156,30 @@ class AIFFInfo(StreamInfo): info = struct.unpack('>hLh10s', data[:18]) channels, frame_count, sample_size, sample_rate = info - self.sample_rate = int(read_float(sample_rate)) - self.sample_size = sample_size + try: + self.sample_rate = int(read_float(sample_rate)) + except OverflowError: + raise error("Invalid sample rate") + if self.sample_rate < 0: + raise error("Invalid sample rate") + if self.sample_rate != 0: + self.length = frame_count / float(self.sample_rate) + + self.bits_per_sample = sample_size + self.sample_size = sample_size # For backward compatibility self.channels = channels self.bitrate = channels * sample_size * self.sample_rate - self.length = frame_count / float(self.sample_rate) def pprint(self): - return "%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( + return u"%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( self.channels, self.bitrate, self.sample_rate, self.length) -class _IFFID3(ID3): +class _IFFID3(IffID3): """A AIFF file with ID3v2 tags""" - def _pre_load_header(self, fileobj): - try: - fileobj.seek(IFFFile(fileobj)['ID3'].data_offset) - except (InvalidChunk, KeyError): - raise ID3NoHeaderError("No ID3 chunk") - - @convert_error(IOError, error) - @loadfile(writable=True) - def save(self, filething, v2_version=4, v23_sep='/', padding=None): - """Save ID3v2 data to the AIFF file""" - - fileobj = filething.fileobj - - iff_file = IFFFile(fileobj) - - if 'ID3' not in iff_file: - iff_file.insert_chunk('ID3') - - chunk = iff_file['ID3'] - - try: - data = self._prepare_data( - fileobj, chunk.data_offset, chunk.data_size, v2_version, - v23_sep, padding) - except ID3Error as e: - reraise(error, e, sys.exc_info()[2]) - - new_size = len(data) - new_size += new_size % 2 # pad byte - assert new_size % 2 == 0 - chunk.resize(new_size) - data += (new_size - len(data)) * b'\x00' - assert new_size == len(data) - chunk.write(data) - - @loadfile(writable=True) - def delete(self, filething): - """Completely removes the ID3 chunk from the AIFF file""" - - delete(filething) - self.clear() + def _load_file(self, fileobj): + return AIFFFile(fileobj) @convert_error(IOError, error) @@ -316,7 +188,7 @@ def delete(filething): """Completely removes the ID3 chunk from the AIFF file""" try: - del IFFFile(filething.fileobj)['ID3'] + del AIFFFile(filething.fileobj)[u'ID3'] except KeyError: pass diff --git a/lib/mutagen/apev2.py b/lib/mutagen/apev2.py old mode 100755 new mode 100644 index 1456c12c..13ca256d --- a/lib/mutagen/apev2.py +++ b/lib/mutagen/apev2.py @@ -32,29 +32,22 @@ __all__ = ["APEv2", "APEv2File", "Open", "delete"] import sys import struct +from io import BytesIO from collections.abc import MutableSequence -from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string, - xrange) from mutagen import Metadata, FileType, StreamInfo from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering, \ - MutagenError, loadfile, convert_error, seek_end, get_size + MutagenError, loadfile, convert_error, seek_end, get_size, reraise def is_valid_apev2_key(key): - if not isinstance(key, text_type): - if PY3: - raise TypeError("APEv2 key must be str") - - try: - key = key.decode('ascii') - except UnicodeDecodeError: - return False + if not isinstance(key, str): + raise TypeError("APEv2 key must be str") # PY26 - Change to set literal syntax (since set is faster than list here) - return ((2 <= len(key) <= 255) and (min(key) >= ' ') and - (max(key) <= '~') and - (key not in ["OggS", "TAG", "ID3", "MP+"])) + return ((2 <= len(key) <= 255) and (min(key) >= u' ') and + (max(key) <= u'~') and + (key not in [u"OggS", u"TAG", u"ID3", u"MP+"])) # There are three different kinds of APE tag values. # "0: Item contains text information coded in UTF-8 @@ -263,7 +256,7 @@ class _CIDictProxy(DictMixin): del(self.__dict[lower]) def keys(self): - return [self.__casemap.get(key, key) for key in list(self.__dict.keys())] + return [self.__casemap.get(key, key) for key in self.__dict.keys()] class APEv2(_CIDictProxy, Metadata): @@ -280,7 +273,7 @@ class APEv2(_CIDictProxy, Metadata): """Return tag key=value pairs in a human-readable format.""" items = sorted(self.items()) - return "\n".join("%s=%s" % (k, v.pprint()) for k, v in items) + return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) @convert_error(IOError, error) @loadfile() @@ -301,7 +294,7 @@ class APEv2(_CIDictProxy, Metadata): def __parse_tag(self, tag, count): """Raises IOError and APEBadItemError""" - fileobj = cBytesIO(tag) + fileobj = BytesIO(tag) for i in range(count): tag_data = fileobj.read(8) @@ -330,11 +323,10 @@ class APEv2(_CIDictProxy, Metadata): if key[-1:] == b"\x00": key = key[:-1] - if PY3: - try: - key = key.decode("ascii") - except UnicodeError as err: - reraise(APEBadItemError, err, sys.exc_info()[2]) + try: + key = key.decode("ascii") + except UnicodeError as err: + reraise(APEBadItemError, err, sys.exc_info()[2]) value = fileobj.read(size) if len(value) != size: raise APEBadItemError @@ -346,16 +338,12 @@ class APEv2(_CIDictProxy, Metadata): def __getitem__(self, key): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - if PY2: - key = key.encode('ascii') return super(APEv2, self).__getitem__(key) def __delitem__(self, key): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - if PY2: - key = key.encode('ascii') super(APEv2, self).__delitem__(key) @@ -383,43 +371,28 @@ class APEv2(_CIDictProxy, Metadata): if not is_valid_apev2_key(key): raise KeyError("%r is not a valid APEv2 key" % key) - if PY2: - key = key.encode('ascii') - if not isinstance(value, _APEValue): # let's guess at the content if we're not already a value... - if isinstance(value, text_type): + if isinstance(value, str): # unicode? we've got to be text. value = APEValue(value, TEXT) elif isinstance(value, list): items = [] for v in value: - if not isinstance(v, text_type): - if PY3: - raise TypeError("item in list not str") - v = v.decode("utf-8") + if not isinstance(v, str): + raise TypeError("item in list not str") items.append(v) # list? text. - value = APEValue("\0".join(items), TEXT) + value = APEValue(u"\0".join(items), TEXT) else: - if PY3: - value = APEValue(value, BINARY) - else: - try: - value.decode("utf-8") - except UnicodeError: - # invalid UTF8 text, probably binary - value = APEValue(value, BINARY) - else: - # valid UTF8, probably text - value = APEValue(value, TEXT) + value = APEValue(value, BINARY) super(APEv2, self).__setitem__(key, value) @convert_error(IOError, error) @loadfile(writable=True, create=True) - def save(self, filething): + def save(self, filething=None): """Save changes to a file. If no filename is given, the one most recently loaded is used. @@ -441,7 +414,7 @@ class APEv2(_CIDictProxy, Metadata): fileobj.seek(0, 2) tags = [] - for key, value in list(self.items()): + for key, value in self.items(): # Packed format for an item: # 4B: Value length # 4B: Value type @@ -459,7 +432,7 @@ class APEv2(_CIDictProxy, Metadata): # "APE tags items should be sorted ascending by size... This is # not a MUST, but STRONGLY recommended. Actually the items should # be sorted by importance/byte, but this is not feasible." - tags.sort(key=len) + tags.sort(key=lambda tag: (len(tag), tag)) num_tags = len(tags) tags = b"".join(tags) @@ -481,7 +454,7 @@ class APEv2(_CIDictProxy, Metadata): @convert_error(IOError, error) @loadfile(writable=True) - def delete(self, filething): + def delete(self, filething=None): """Remove tags from a file.""" fileobj = filething.fileobj @@ -578,7 +551,6 @@ class _APEValue(object): return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind) -@swap_to_string @total_ordering class _APEUtf8Value(_APEValue): @@ -589,11 +561,8 @@ class _APEUtf8Value(_APEValue): reraise(APEBadItemError, e, sys.exc_info()[2]) def _validate(self, value): - if not isinstance(value, text_type): - if PY3: - raise TypeError("value not str") - else: - value = value.decode("utf-8") + if not isinstance(value, str): + raise TypeError("value not str") return value def _write(self): @@ -627,46 +596,39 @@ class APETextValue(_APEUtf8Value, MutableSequence): def __iter__(self): """Iterate over the strings of the value (not the characters)""" - return iter(self.value.split("\0")) + return iter(self.value.split(u"\0")) def __getitem__(self, index): - return self.value.split("\0")[index] + return self.value.split(u"\0")[index] def __len__(self): - return self.value.count("\0") + 1 + return self.value.count(u"\0") + 1 def __setitem__(self, index, value): - if not isinstance(value, text_type): - if PY3: - raise TypeError("value not str") - else: - value = value.decode("utf-8") + if not isinstance(value, str): + raise TypeError("value not str") values = list(self) values[index] = value - self.value = "\0".join(values) + self.value = u"\0".join(values) def insert(self, index, value): - if not isinstance(value, text_type): - if PY3: - raise TypeError("value not str") - else: - value = value.decode("utf-8") + if not isinstance(value, str): + raise TypeError("value not str") values = list(self) values.insert(index, value) - self.value = "\0".join(values) + self.value = u"\0".join(values) def __delitem__(self, index): values = list(self) del values[index] - self.value = "\0".join(values) + self.value = u"\0".join(values) def pprint(self): - return " / ".join(self) + return u" / ".join(self) -@swap_to_string @total_ordering class APEBinaryValue(_APEValue): """An APEv2 binary value.""" @@ -697,7 +659,7 @@ class APEBinaryValue(_APEValue): return self.value < other def pprint(self): - return "[%d bytes]" % len(self) + return u"[%d bytes]" % len(self) class APEExtValue(_APEUtf8Value): @@ -709,7 +671,7 @@ class APEExtValue(_APEUtf8Value): kind = EXTERNAL def pprint(self): - return "[External] %s" % self.value + return u"[External] %s" % self.value class APEv2File(FileType): @@ -731,7 +693,7 @@ class APEv2File(FileType): @staticmethod def pprint(): - return "Unknown format with APEv2 tag." + return u"Unknown format with APEv2 tag." @loadfile() def load(self, filething): diff --git a/lib/mutagen/asf/__init__.py b/lib/mutagen/asf/__init__.py old mode 100755 new mode 100644 index bcee1be4..8b64e2a2 --- a/lib/mutagen/asf/__init__.py +++ b/lib/mutagen/asf/__init__.py @@ -13,7 +13,6 @@ __all__ = ["ASF", "Open"] from mutagen import FileType, Tags, StreamInfo from mutagen._util import resize_bytes, DictMixin, loadfile, convert_error -from mutagen._compat import string_types, long_, PY3, izip from ._util import error, ASFError, ASFHeaderError from ._objects import HeaderObject, MetadataLibraryObject, MetadataObject, \ @@ -24,7 +23,7 @@ from ._attrs import ASFGUIDAttribute, ASFWordAttribute, ASFQWordAttribute, \ ASFUnicodeAttribute, ASFBaseAttribute, ASFValue -# pyflakes +# flake8 error, ASFError, ASFHeaderError, ASFValue @@ -51,26 +50,26 @@ class ASFInfo(StreamInfo): sample_rate = 0 bitrate = 0 channels = 0 - codec_type = "" - codec_name = "" - codec_description = "" + codec_type = u"" + codec_name = u"" + codec_description = u"" def __init__(self): self.length = 0.0 self.sample_rate = 0 self.bitrate = 0 self.channels = 0 - self.codec_type = "" - self.codec_name = "" - self.codec_description = "" + self.codec_type = u"" + self.codec_name = u"" + self.codec_description = u"" def pprint(self): """Returns: text: a stream information text summary """ - s = "ASF (%s) %d bps, %s Hz, %d channels, %.2f seconds" % ( - self.codec_type or self.codec_name or "???", self.bitrate, + s = u"ASF (%s) %d bps, %s Hz, %d channels, %.2f seconds" % ( + self.codec_type or self.codec_name or u"???", self.bitrate, self.sample_rate, self.channels, self.length) return s @@ -89,7 +88,6 @@ class ASFTags(list, DictMixin, Tags): """ - # PY3 only if isinstance(key, slice): return list.__getitem__(self, key) @@ -102,7 +100,6 @@ class ASFTags(list, DictMixin, Tags): def __delitem__(self, key): """Delete all values associated with the key.""" - # PY3 only if isinstance(key, slice): return list.__delitem__(self, key) @@ -129,7 +126,6 @@ class ASFTags(list, DictMixin, Tags): string. """ - # PY3 only if isinstance(key, slice): return list.__setitem__(self, key, values) @@ -139,16 +135,14 @@ class ASFTags(list, DictMixin, Tags): to_append = [] for value in values: if not isinstance(value, ASFBaseAttribute): - if isinstance(value, string_types): + if isinstance(value, str): value = ASFUnicodeAttribute(value) - elif PY3 and isinstance(value, bytes): + elif isinstance(value, bytes): value = ASFByteArrayAttribute(value) elif isinstance(value, bool): value = ASFBoolAttribute(value) elif isinstance(value, int): value = ASFDWordAttribute(value) - elif isinstance(value, long_): - value = ASFQWordAttribute(value) else: raise TypeError("Invalid type %r" % type(value)) to_append.append((key, value)) @@ -252,14 +246,14 @@ class ASF(FileType): @convert_error(IOError, error) @loadfile(writable=True) - def save(self, filething, padding=None): + def save(self, filething=None, padding=None): """save(filething=None, padding=None) Save tag changes back to the loaded file. Args: filething (filething) - padding (PaddingFunction) + padding (:obj:`mutagen.PaddingFunction`) Raises: mutagen.MutagenError """ @@ -319,7 +313,7 @@ class ASF(FileType): raise ASFError @loadfile(writable=True) - def delete(self, filething): + def delete(self, filething=None): """delete(filething=None) Args: diff --git a/lib/mutagen/asf/_attrs.py b/lib/mutagen/asf/_attrs.py old mode 100755 new mode 100644 index d8f304af..823ea2fb --- a/lib/mutagen/asf/_attrs.py +++ b/lib/mutagen/asf/_attrs.py @@ -10,8 +10,7 @@ import sys import struct -from mutagen._compat import swap_to_string, text_type, PY2, reraise -from mutagen._util import total_ordering +from mutagen._util import total_ordering, reraise from ._util import ASFError @@ -36,7 +35,7 @@ class ASFBaseAttribute(object): stream=None, **kwargs): self.language = language self.stream = stream - if data: + if data is not None: self.value = self.parse(data, **kwargs) else: if value is None: @@ -103,7 +102,6 @@ class ASFBaseAttribute(object): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFUnicodeAttribute(ASFBaseAttribute): """Unicode string attribute. @@ -122,11 +120,8 @@ class ASFUnicodeAttribute(ASFBaseAttribute): reraise(ASFError, e, sys.exc_info()[2]) def _validate(self, value): - if not isinstance(value, text_type): - if PY2: - return value.decode("utf-8") - else: - raise TypeError("%r not str" % value) + if not isinstance(value, str): + raise TypeError("%r not str" % value) return value def _render(self): @@ -142,16 +137,15 @@ class ASFUnicodeAttribute(ASFBaseAttribute): return self.value def __eq__(self, other): - return text_type(self) == other + return str(self) == other def __lt__(self, other): - return text_type(self) < other + return str(self) < other __hash__ = ASFBaseAttribute.__hash__ @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFByteArrayAttribute(ASFBaseAttribute): """Byte array attribute. @@ -194,7 +188,6 @@ class ASFByteArrayAttribute(ASFBaseAttribute): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFBoolAttribute(ASFBaseAttribute): """Bool attribute. @@ -228,10 +221,10 @@ class ASFBoolAttribute(ASFBaseAttribute): return bool(self.value) def __bytes__(self): - return text_type(self.value).encode('utf-8') + return str(self.value).encode('utf-8') def __str__(self): - return text_type(self.value) + return str(self.value) def __eq__(self, other): return bool(self.value) == other @@ -243,7 +236,6 @@ class ASFBoolAttribute(ASFBaseAttribute): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFDWordAttribute(ASFBaseAttribute): """DWORD attribute. @@ -274,10 +266,10 @@ class ASFDWordAttribute(ASFBaseAttribute): return self.value def __bytes__(self): - return text_type(self.value).encode('utf-8') + return str(self.value).encode('utf-8') def __str__(self): - return text_type(self.value) + return str(self.value) def __eq__(self, other): return int(self.value) == other @@ -289,7 +281,6 @@ class ASFDWordAttribute(ASFBaseAttribute): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFQWordAttribute(ASFBaseAttribute): """QWORD attribute. @@ -320,10 +311,10 @@ class ASFQWordAttribute(ASFBaseAttribute): return self.value def __bytes__(self): - return text_type(self.value).encode('utf-8') + return str(self.value).encode('utf-8') def __str__(self): - return text_type(self.value) + return str(self.value) def __eq__(self, other): return int(self.value) == other @@ -335,7 +326,6 @@ class ASFQWordAttribute(ASFBaseAttribute): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFWordAttribute(ASFBaseAttribute): """WORD attribute. @@ -366,10 +356,10 @@ class ASFWordAttribute(ASFBaseAttribute): return self.value def __bytes__(self): - return text_type(self.value).encode('utf-8') + return str(self.value).encode('utf-8') def __str__(self): - return text_type(self.value) + return str(self.value) def __eq__(self, other): return int(self.value) == other @@ -381,7 +371,6 @@ class ASFWordAttribute(ASFBaseAttribute): @ASFBaseAttribute._register -@swap_to_string @total_ordering class ASFGUIDAttribute(ASFBaseAttribute): """GUID attribute.""" diff --git a/lib/mutagen/asf/_objects.py b/lib/mutagen/asf/_objects.py old mode 100755 new mode 100644 index ccb1b69b..0560b116 --- a/lib/mutagen/asf/_objects.py +++ b/lib/mutagen/asf/_objects.py @@ -10,7 +10,6 @@ import struct from mutagen._util import cdata, get_size -from mutagen._compat import text_type, xrange, izip from mutagen._tags import PaddingInfo from ._util import guid2bytes, bytes2guid, CODECS, ASFError, ASFHeaderError @@ -108,13 +107,16 @@ class HeaderObject(BaseObject): try: data = fileobj.read(payload_size) - except OverflowError: + except (OverflowError, MemoryError): # read doesn't take 64bit values raise ASFHeaderError("invalid header size") if len(data) != payload_size: raise ASFHeaderError("truncated") - obj.parse(asf, data) + try: + obj.parse(asf, data) + except struct.error: + raise ASFHeaderError("truncated") header.objects.append(obj) return header @@ -151,7 +153,8 @@ class HeaderObject(BaseObject): # ask the user for padding adjustments file_size = get_size(fileobj) content_size = file_size - available - assert content_size >= 0 + if content_size < 0: + raise ASFHeaderError("truncated content") info = PaddingInfo(available - needed_size, content_size) # add padding @@ -180,11 +183,11 @@ class ContentDescriptionObject(BaseObject): GUID = guid2bytes("75B22633-668E-11CF-A6D9-00AA0062CE6C") NAMES = [ - "Title", - "Author", - "Copyright", - "Description", - "Rating", + u"Title", + u"Author", + u"Copyright", + u"Description", + u"Rating", ] def parse(self, asf, data): @@ -195,7 +198,7 @@ class ContentDescriptionObject(BaseObject): for length in lengths: end = pos + length if length > 0: - texts.append(data[pos:end].decode("utf-16-le").strip("\x00")) + texts.append(data[pos:end].decode("utf-16-le").strip(u"\x00")) else: texts.append(None) pos = end @@ -209,12 +212,12 @@ class ContentDescriptionObject(BaseObject): def render_text(name): value = asf.to_content_description.get(name) if value is not None: - return text_type(value).encode("utf-16-le") + b"\x00\x00" + return str(value).encode("utf-16-le") + b"\x00\x00" else: return b"" texts = [render_text(x) for x in self.NAMES] - data = struct.pack("= 0 asf.info.length = max((length / 10000000.0) - (preroll / 1000.0), 0.0) @@ -292,7 +297,7 @@ class CodecListObject(BaseObject): try: name = data[offset:next_offset].decode("utf-16-le").strip("\x00") except UnicodeDecodeError: - name = "" + name = u"" offset = next_offset units, offset = cdata.uint16_le_from(data, offset) @@ -300,12 +305,12 @@ class CodecListObject(BaseObject): try: desc = data[offset:next_offset].decode("utf-16-le").strip("\x00") except UnicodeDecodeError: - desc = "" + desc = u"" offset = next_offset bytes_, offset = cdata.uint16_le_from(data, offset) next_offset = offset + bytes_ - codec = "" + codec = u"" if bytes_ == 2: codec_id = cdata.uint16_le_from(data, offset)[0] if codec_id in CODECS: @@ -377,6 +382,8 @@ class HeaderExtensionObject(BaseObject): while datapos < datasize: guid, size = struct.unpack( "<16sQ", data[22 + datapos:22 + datapos + 24]) + if size < 1: + raise ASFHeaderError("invalid size in header extension") obj = BaseObject._get_object(guid) obj.parse(asf, data[22 + datapos + 24:22 + datapos + size]) self.objects.append(obj) @@ -423,7 +430,7 @@ class MetadataObject(BaseObject): asf._tags.setdefault(self.GUID, []).append((name, attr)) def render(self, asf): - attrs = list(asf.to_metadata.items()) + attrs = asf.to_metadata.items() data = b"".join([attr.render_m(name) for (name, attr) in attrs]) return (self.GUID + struct.pack("4sQ', header) + + @classmethod + def get_class(cls, id): + if id in DSDIFFListChunk.LIST_CHUNK_IDS: + return DSDIFFListChunk + elif id == 'DST': + return DSTChunk + else: + return cls + + def write_new_header(self, id_, size): + self._fileobj.write(struct.pack('>4sQ', id_, size)) + + def write_size(self): + self._fileobj.write(struct.pack('>Q', self.data_size)) + + +class DSDIFFListChunk(DSDIFFChunk, IffContainerChunkMixin): + """A DSDIFF chunk containing other chunks. + """ + + LIST_CHUNK_IDS = ['FRM8', 'PROP'] + + def parse_next_subchunk(self): + return DSDIFFChunk.parse(self._fileobj, self) + + def __init__(self, fileobj, id, data_size, parent_chunk): + if id not in self.LIST_CHUNK_IDS: + raise InvalidChunk('Not a list chunk: %s' % id) + + DSDIFFChunk.__init__(self, fileobj, id, data_size, parent_chunk) + self.init_container() + + +class DSTChunk(DSDIFFChunk, IffContainerChunkMixin): + """A DSDIFF chunk containing other chunks. + """ + + def parse_next_subchunk(self): + return DSDIFFChunk.parse(self._fileobj, self) + + def __init__(self, fileobj, id, data_size, parent_chunk): + if id != 'DST': + raise InvalidChunk('Not a DST chunk: %s' % id) + + DSDIFFChunk.__init__(self, fileobj, id, data_size, parent_chunk) + self.init_container(name_size=0) + + +class DSDIFFFile(IffFile): + """Representation of a DSDIFF file""" + + def __init__(self, fileobj): + super().__init__(DSDIFFChunk, fileobj) + + if self.root.id != u'FRM8': + raise InvalidChunk("Root chunk must be a FRM8 chunk, got %r" + % self.root) + + +class DSDIFFInfo(StreamInfo): + + """DSDIFF stream information. + + Attributes: + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): audio sample size (for DSD this is always 1) + bitrate (`int`): audio bitrate, in bits per second + compression (`str`): DSD (uncompressed) or DST + """ + + channels = 0 + length = 0 + sample_rate = 0 + bits_per_sample = 1 + bitrate = 0 + compression = None + + @convert_error(IOError, error) + def __init__(self, fileobj): + """Raises error""" + + iff = DSDIFFFile(fileobj) + try: + prop_chunk = iff['PROP'] + except KeyError as e: + raise error(str(e)) + + if prop_chunk.name == 'SND ': + for chunk in prop_chunk.subchunks(): + if chunk.id == 'FS' and chunk.data_size == 4: + data = chunk.read() + if len(data) < 4: + raise InvalidChunk("Not enough data in FS chunk") + self.sample_rate, = struct.unpack('>L', data[:4]) + elif chunk.id == 'CHNL' and chunk.data_size >= 2: + data = chunk.read() + if len(data) < 2: + raise InvalidChunk("Not enough data in CHNL chunk") + self.channels, = struct.unpack('>H', data[:2]) + elif chunk.id == 'CMPR' and chunk.data_size >= 4: + data = chunk.read() + if len(data) < 4: + raise InvalidChunk("Not enough data in CMPR chunk") + compression_id, = struct.unpack('>4s', data[:4]) + self.compression = compression_id.decode('ascii').rstrip() + + if self.sample_rate < 0: + raise error("Invalid sample rate") + + if self.compression == 'DSD': # not compressed + try: + dsd_chunk = iff['DSD'] + except KeyError as e: + raise error(str(e)) + + # DSD data has one bit per sample. Eight samples of a channel + # are clustered together for a channel byte. For multiple channels + # the channel bytes are interleaved (in the order specified in the + # CHNL chunk). See DSDIFF spec chapter 3.3. + sample_count = dsd_chunk.data_size * 8 / (self.channels or 1) + + if self.sample_rate != 0: + self.length = sample_count / float(self.sample_rate) + + self.bitrate = (self.channels * self.bits_per_sample + * self.sample_rate) + elif self.compression == 'DST': + try: + dst_frame = iff['DST'] + dst_frame_info = dst_frame['FRTE'] + except KeyError as e: + raise error(str(e)) + + if dst_frame_info.data_size >= 6: + data = dst_frame_info.read() + if len(data) < 6: + raise InvalidChunk("Not enough data in FRTE chunk") + frame_count, frame_rate = struct.unpack('>LH', data[:6]) + if frame_rate: + self.length = frame_count / frame_rate + + if frame_count: + dst_data_size = dst_frame.data_size - dst_frame_info.size + avg_frame_size = dst_data_size / frame_count + self.bitrate = avg_frame_size * 8 * frame_rate + + def pprint(self): + return u"%d channel DSDIFF (%s) @ %d bps, %s Hz, %.2f seconds" % ( + self.channels, self.compression, self.bitrate, self.sample_rate, + self.length) + + +class _DSDIFFID3(IffID3): + """A DSDIFF file with ID3v2 tags""" + + def _load_file(self, fileobj): + return DSDIFFFile(fileobj) + + +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """Completely removes the ID3 chunk from the DSDIFF file""" + + try: + del DSDIFFFile(filething.fileobj)[u'ID3'] + except KeyError: + pass + + +class DSDIFF(FileType): + """DSDIFF(filething) + + An DSDIFF audio file. + + For tagging ID3v2 data is added to a chunk with the ID "ID3 ". + + Arguments: + filething (filething) + + Attributes: + tags (`mutagen.id3.ID3`) + info (`DSDIFFInfo`) + """ + + _mimes = ["audio/x-dff"] + + @convert_error(IOError, error) + @loadfile() + def load(self, filething, **kwargs): + fileobj = filething.fileobj + + try: + self.tags = _DSDIFFID3(fileobj, **kwargs) + except ID3NoHeaderError: + self.tags = None + except ID3Error as e: + raise error(e) + else: + self.tags.filename = self.filename + + fileobj.seek(0, 0) + self.info = DSDIFFInfo(fileobj) + + def add_tags(self): + """Add empty ID3 tags to the file.""" + if self.tags is None: + self.tags = _DSDIFFID3() + else: + raise error("an ID3 tag already exists") + + @staticmethod + def score(filename, fileobj, header): + return header.startswith(b"FRM8") * 2 + endswith(filename, ".dff") + + +Open = DSDIFF diff --git a/lib/mutagen/dsf.py b/lib/mutagen/dsf.py old mode 100755 new mode 100644 index f0593e9f..8686efeb --- a/lib/mutagen/dsf.py +++ b/lib/mutagen/dsf.py @@ -11,11 +11,11 @@ import sys import struct - -from ._compat import cBytesIO, reraise, endswith +from io import BytesIO from mutagen import FileType, StreamInfo -from mutagen._util import cdata, MutagenError, loadfile, convert_error +from mutagen._util import cdata, MutagenError, loadfile, \ + convert_error, reraise, endswith from mutagen.id3 import ID3 from mutagen.id3._util import ID3NoHeaderError, error as ID3Error @@ -80,7 +80,7 @@ class DSDChunk(DSFChunk): self.offset_metdata_chunk = cdata.ulonglong_le(data[20:28]) def write(self): - f = cBytesIO() + f = BytesIO() f.write(self.chunk_header) f.write(struct.pack(" '\x7f': + if v and max(v) > u'\x7f': enc = 3 break @@ -173,7 +172,8 @@ class EasyID3(DictMixin, Metadata): lambda s, v: setattr(s.__id3, 'load', v)) @loadfile(writable=True, create=True) - def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + def save(self, filething=None, v1=1, v2_version=4, v23_sep='/', + padding=None): """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) Save changes to a file. @@ -203,8 +203,9 @@ class EasyID3(DictMixin, Metadata): filename = property(lambda s: s.__id3.filename, lambda s, fn: setattr(s.__id3, 'filename', fn)) - size = property(lambda s: s.__id3.size, - lambda s, fn: setattr(s.__id3, 'size', s)) + @property + def size(self): + return self.__id3.size def __getitem__(self, key): func = dict_match(self.Get, key.lower(), self.GetFallback) @@ -214,12 +215,8 @@ class EasyID3(DictMixin, Metadata): raise EasyID3KeyError("%r is not a valid key" % key) def __setitem__(self, key, value): - if PY2: - if isinstance(value, str): - value = [value] - else: - if isinstance(value, text_type): - value = [value] + if isinstance(value, str): + value = [value] func = dict_match(self.Set, key.lower(), self.SetFallback) if func is not None: return func(self.__id3, key, value) @@ -235,7 +232,7 @@ class EasyID3(DictMixin, Metadata): def keys(self): keys = [] - for key in list(self.Get.keys()): + for key in self.Get.keys(): if key in self.List: keys.extend(self.List[key](self.__id3, key)) elif key in self: @@ -398,7 +395,7 @@ def gain_get(id3, key): except KeyError: raise EasyID3KeyError(key) else: - return ["%+f dB" % frame.gain] + return [u"%+f dB" % frame.gain] def gain_set(id3, key, value): @@ -432,7 +429,7 @@ def peak_get(id3, key): except KeyError: raise EasyID3KeyError(key) else: - return ["%f" % frame.peak] + return [u"%f" % frame.peak] def peak_set(id3, key, value): @@ -469,7 +466,7 @@ def peakgain_list(id3, key): keys.append("replaygain_%s_peak" % frame.desc) return keys -for frameid, key in iteritems({ +for frameid, key in { "TALB": "album", "TBPM": "bpm", "TCMP": "compilation", # iTunes extension @@ -498,7 +495,7 @@ for frameid, key in iteritems({ "TSRC": "isrc", "TSST": "discsubtitle", "TLAN": "language", -}): +}.items(): EasyID3.RegisterTextKey(key, frameid) EasyID3.RegisterKey("genre", genre_get, genre_set, genre_delete) @@ -519,28 +516,28 @@ EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete) # http://musicbrainz.org/docs/specs/metadata_tags.html # http://bugs.musicbrainz.org/ticket/1383 # http://musicbrainz.org/doc/MusicBrainzTag -for desc, key in iteritems({ - "MusicBrainz Artist Id": "musicbrainz_artistid", - "MusicBrainz Album Id": "musicbrainz_albumid", - "MusicBrainz Album Artist Id": "musicbrainz_albumartistid", - "MusicBrainz TRM Id": "musicbrainz_trmid", - "MusicIP PUID": "musicip_puid", - "MusicMagic Fingerprint": "musicip_fingerprint", - "MusicBrainz Album Status": "musicbrainz_albumstatus", - "MusicBrainz Album Type": "musicbrainz_albumtype", - "MusicBrainz Album Release Country": "releasecountry", - "MusicBrainz Disc Id": "musicbrainz_discid", - "ASIN": "asin", - "ALBUMARTISTSORT": "albumartistsort", - "PERFORMER": "performer", - "BARCODE": "barcode", - "CATALOGNUMBER": "catalognumber", - "MusicBrainz Release Track Id": "musicbrainz_releasetrackid", - "MusicBrainz Release Group Id": "musicbrainz_releasegroupid", - "MusicBrainz Work Id": "musicbrainz_workid", - "Acoustid Fingerprint": "acoustid_fingerprint", - "Acoustid Id": "acoustid_id", -}): +for desc, key in { + u"MusicBrainz Artist Id": "musicbrainz_artistid", + u"MusicBrainz Album Id": "musicbrainz_albumid", + u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid", + u"MusicBrainz TRM Id": "musicbrainz_trmid", + u"MusicIP PUID": "musicip_puid", + u"MusicMagic Fingerprint": "musicip_fingerprint", + u"MusicBrainz Album Status": "musicbrainz_albumstatus", + u"MusicBrainz Album Type": "musicbrainz_albumtype", + u"MusicBrainz Album Release Country": "releasecountry", + u"MusicBrainz Disc Id": "musicbrainz_discid", + u"ASIN": "asin", + u"ALBUMARTISTSORT": "albumartistsort", + u"PERFORMER": "performer", + u"BARCODE": "barcode", + u"CATALOGNUMBER": "catalognumber", + u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", + u"MusicBrainz Release Group Id": "musicbrainz_releasegroupid", + u"MusicBrainz Work Id": "musicbrainz_workid", + u"Acoustid Fingerprint": "acoustid_fingerprint", + u"Acoustid Id": "acoustid_id", +}.items(): EasyID3.RegisterTXXXKey(key, desc) diff --git a/lib/mutagen/easymp4.py b/lib/mutagen/easymp4.py old mode 100755 new mode 100644 index a026e754..54d9ab5b --- a/lib/mutagen/easymp4.py +++ b/lib/mutagen/easymp4.py @@ -9,7 +9,6 @@ from mutagen import Tags from mutagen._util import DictMixin, dict_match from mutagen.mp4 import MP4, MP4Tags, error, delete -from ._compat import PY2, text_type, PY3 __all__ = ["EasyMP4Tags", "EasyMP4", "delete", "error"] @@ -42,11 +41,14 @@ class EasyMP4Tags(DictMixin, Tags): self.load = self.__mp4.load self.save = self.__mp4.save self.delete = self.__mp4.delete - self._padding = self.__mp4._padding filename = property(lambda s: s.__mp4.filename, lambda s, fn: setattr(s.__mp4, 'filename', fn)) + @property + def _padding(self): + return self.__mp4._padding + @classmethod def RegisterKey(cls, key, getter=None, setter=None, deleter=None, lister=None): @@ -103,7 +105,7 @@ class EasyMP4Tags(DictMixin, Tags): """ def getter(tags, key): - return list(map(text_type, tags[atomid])) + return list(map(str, tags[atomid])) def setter(tags, key, value): clamp = lambda x: int(min(max(min_value, x), max_value)) @@ -121,9 +123,9 @@ class EasyMP4Tags(DictMixin, Tags): ret = [] for (track, total) in tags[atomid]: if total: - ret.append("%d/%d" % (track, total)) + ret.append(u"%d/%d" % (track, total)) else: - ret.append(text_type(track)) + ret.append(str(track)) return ret def setter(tags, key, value): @@ -164,10 +166,8 @@ class EasyMP4Tags(DictMixin, Tags): def setter(tags, key, value): encoded = [] for v in value: - if not isinstance(v, text_type): - if PY3: - raise TypeError("%r not str" % v) - v = v.decode("utf-8") + if not isinstance(v, str): + raise TypeError("%r not str" % v) encoded.append(v.encode("utf-8")) tags[atomid] = encoded @@ -187,12 +187,8 @@ class EasyMP4Tags(DictMixin, Tags): def __setitem__(self, key, value): key = key.lower() - if PY2: - if isinstance(value, str): - value = [value] - else: - if isinstance(value, text_type): - value = [value] + if isinstance(value, str): + value = [value] func = dict_match(self.Set, key) if func is not None: @@ -210,7 +206,7 @@ class EasyMP4Tags(DictMixin, Tags): def keys(self): keys = [] - for key in list(self.Get.keys()): + for key in self.Get.keys(): if key in self.List: keys.extend(self.List[key](self.__mp4, key)) elif key in self: @@ -226,7 +222,7 @@ class EasyMP4Tags(DictMixin, Tags): strings.append("%s=%s" % (key, value)) return "\n".join(strings) -for atomid, key in list({ +for atomid, key in { '\xa9nam': 'title', '\xa9alb': 'album', '\xa9ART': 'artist', @@ -242,10 +238,10 @@ for atomid, key in list({ 'soar': 'artistsort', 'sonm': 'titlesort', 'soco': 'composersort', -}.items()): +}.items(): EasyMP4Tags.RegisterTextKey(key, atomid) -for name, key in list({ +for name, key in { 'MusicBrainz Artist Id': 'musicbrainz_artistid', 'MusicBrainz Track Id': 'musicbrainz_trackid', 'MusicBrainz Album Id': 'musicbrainz_albumid', @@ -254,18 +250,18 @@ for name, key in list({ 'MusicBrainz Album Status': 'musicbrainz_albumstatus', 'MusicBrainz Album Type': 'musicbrainz_albumtype', 'MusicBrainz Release Country': 'releasecountry', -}.items()): +}.items(): EasyMP4Tags.RegisterFreeformKey(key, name) -for name, key in list({ +for name, key in { "tmpo": "bpm", -}.items()): +}.items(): EasyMP4Tags.RegisterIntKey(key, name) -for name, key in list({ +for name, key in { "trkn": "tracknumber", "disk": "discnumber", -}.items()): +}.items(): EasyMP4Tags.RegisterIntPairKey(key, name) diff --git a/lib/mutagen/flac.py b/lib/mutagen/flac.py old mode 100755 new mode 100644 index 825e947a..58a12d07 --- a/lib/mutagen/flac.py +++ b/lib/mutagen/flac.py @@ -23,12 +23,12 @@ http://flac.sourceforge.net/format.html __all__ = ["FLAC", "Open", "delete"] import struct +from io import BytesIO from ._vorbis import VCommentDict import mutagen -from ._compat import cBytesIO, endswith, chr_, xrange from mutagen._util import resize_bytes, MutagenError, get_size, loadfile, \ - convert_error + convert_error, bchr, endswith from mutagen._tags import PaddingInfo from mutagen.id3._util import BitPaddedInt from functools import reduce @@ -101,7 +101,7 @@ class MetadataBlock(object): if data is not None: if not isinstance(data, StrictFileObject): if isinstance(data, bytes): - data = cBytesIO(data) + data = BytesIO(data) elif not hasattr(data, 'read'): raise TypeError( "StreamInfo requires string data or a file-like") @@ -201,7 +201,7 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): self.channels == other.channels and self.bits_per_sample == other.bits_per_sample and self.total_samples == other.total_samples) - except: + except Exception: return False __hash__ = MetadataBlock.__hash__ @@ -232,7 +232,7 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): self.md5_signature = to_int_be(data.read(16)) def write(self): - f = cBytesIO() + f = BytesIO() f.write(struct.pack(">I", self.min_blocksize)[-2:]) f.write(struct.pack(">I", self.max_blocksize)[-2:]) f.write(struct.pack(">I", self.min_framesize)[-3:]) @@ -244,11 +244,11 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): byte = (self.sample_rate & 0xF) << 4 byte += ((self.channels - 1) & 7) << 1 byte += ((self.bits_per_sample - 1) >> 4) & 1 - f.write(chr_(byte)) + f.write(bchr(byte)) # 4 bits of bps, 4 of sample count byte = ((self.bits_per_sample - 1) & 0xF) << 4 byte += (self.total_samples >> 32) & 0xF - f.write(chr_(byte)) + f.write(bchr(byte)) # last 32 of sample count f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFF)) # MD5 signature @@ -259,7 +259,7 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): return f.getvalue() def pprint(self): - return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + return u"FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) class SeekPoint(tuple): @@ -284,6 +284,9 @@ class SeekPoint(tuple): return super(cls, SeekPoint).__new__( cls, (first_sample, byte_offset, num_samples)) + def __getnewargs__(self): + return self.first_sample, self.byte_offset, self.num_samples + first_sample = property(lambda self: self[0]) byte_offset = property(lambda self: self[1]) num_samples = property(lambda self: self[2]) @@ -322,7 +325,7 @@ class SeekTable(MetadataBlock): sp = data.tryread(self.__SEEKPOINT_SIZE) def write(self): - f = cBytesIO() + f = BytesIO() for seekpoint in self.seekpoints: packed = struct.pack( self.__SEEKPOINT_FORMAT, @@ -391,10 +394,10 @@ class CueSheetTrack(object): Attributes: track_number (`int`): track number start_offset (`int`): track offset in samples from start of FLAC stream - isrc (`text`): ISRC code, exactly 12 characters + isrc (`mutagen.text`): ISRC code, exactly 12 characters type (`int`): 0 for audio, 1 for digital data pre_emphasis (`bool`): true if the track is recorded with pre-emphasis - indexes (List[`mutagen.flac.CueSheetTrackIndex`]): + indexes (list[CueSheetTrackIndex]): list of CueSheetTrackIndex objects """ @@ -437,14 +440,14 @@ class CueSheet(MetadataBlock): in the cue sheet. Attributes: - media_catalog_number (`text`): media catalog number in ASCII, + media_catalog_number (`mutagen.text`): media catalog number in ASCII, up to 128 characters lead_in_samples (`int`): number of lead-in samples compact_disc (`bool`): true if the cuesheet corresponds to a compact disc - tracks (List[`mutagen.flac.CueSheetTrack`]): + tracks (list[CueSheetTrack]): list of CueSheetTrack objects - lead_out (`mutagen.flac.CueSheetTrack` or `None`): + lead_out (`CueSheetTrack` or `None`): lead-out as CueSheetTrack or None if lead-out was not found """ @@ -502,7 +505,7 @@ class CueSheet(MetadataBlock): self.tracks.append(val) def write(self): - f = cBytesIO() + f = BytesIO() flags = 0 if self.compact_disc: flags |= 0x80 @@ -574,8 +577,8 @@ class Picture(MetadataBlock): def __init__(self, data=None): self.type = 0 - self.mime = '' - self.desc = '' + self.mime = u'' + self.desc = u'' self.width = 0 self.height = 0 self.depth = 0 @@ -608,7 +611,7 @@ class Picture(MetadataBlock): self.data = data.read(length) def write(self): - f = cBytesIO() + f = BytesIO() mime = self.mime.encode('UTF-8') f.write(struct.pack('>2I', self.type, len(mime))) f.write(mime) @@ -678,7 +681,7 @@ class FLAC(mutagen.FileType): Attributes: cuesheet (`CueSheet`): if any or `None` seektable (`SeekTable`): if any or `None` - pictures (List[`Picture`]): list of embedded pictures + pictures (list[Picture]): list of embedded pictures info (`StreamInfo`) tags (`mutagen._vorbis.VCommentDict`) """ @@ -732,7 +735,9 @@ class FLAC(mutagen.FileType): if self.tags is None: self.tags = block else: - raise FLACVorbisError("> 1 Vorbis comment block found") + # https://github.com/quodlibet/mutagen/issues/377 + # Something writes multiple and metaflac doesn't care + pass elif block.code == CueSheet.code: if self.cuesheet is None: self.cuesheet = block @@ -756,19 +761,21 @@ class FLAC(mutagen.FileType): add_vorbiscomment = add_tags + @convert_error(IOError, error) @loadfile(writable=True) - def delete(self, filething): + def delete(self, filething=None): """Remove Vorbis comments from a file. If no filename is given, the one most recently loaded is used. """ if self.tags is not None: - self.metadata_blocks.remove(self.tags) - try: - self.save(filething, padding=lambda x: 0) - finally: - self.metadata_blocks.append(self.tags) + temp_blocks = [ + b for b in self.metadata_blocks if b.code != VCFLACDict.code] + self._save(filething, temp_blocks, False, padding=lambda x: 0) + self.metadata_blocks[:] = [ + b for b in self.metadata_blocks + if b.code != VCFLACDict.code or b is self.tags] self.tags.clear() vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.") @@ -823,26 +830,24 @@ class FLAC(mutagen.FileType): @property def pictures(self): - """ - Returns: - List[`Picture`]: List of embedded pictures - """ - return [b for b in self.metadata_blocks if b.code == Picture.code] @convert_error(IOError, error) @loadfile(writable=True) - def save(self, filething, deleteid3=False, padding=None): + def save(self, filething=None, deleteid3=False, padding=None): """Save metadata blocks to a file. Args: filething (filething) deleteid3 (bool): delete id3 tags while at it - padding (PaddingFunction) + padding (:obj:`mutagen.PaddingFunction`) If no filename is given, the one most recently loaded is used. """ + self._save(filething, self.metadata_blocks, deleteid3, padding) + + def _save(self, filething, metadata_blocks, deleteid3, padding): f = StrictFileObject(filething.fileobj) header = self.__check_header(f, filething.name) audio_offset = self.__find_audio_offset(f) @@ -857,7 +862,7 @@ class FLAC(mutagen.FileType): content_size = get_size(f) - audio_offset assert content_size >= 0 data = MetadataBlock._writeblocks( - self.metadata_blocks, available, content_size, padding) + metadata_blocks, available, content_size, padding) data_size = len(data) resize_bytes(filething.fileobj, available, data_size, header) diff --git a/lib/mutagen/id3/__init__.py b/lib/mutagen/id3/__init__.py old mode 100755 new mode 100644 index 325626f1..adebd7a3 --- a/lib/mutagen/id3/__init__.py +++ b/lib/mutagen/id3/__init__.py @@ -38,22 +38,30 @@ from ._frames import Frames, Frames_2_2, Frame, TextFrame, UrlFrame, \ from ._util import ID3NoHeaderError, error, ID3UnsupportedVersionError from ._id3v1 import ParseID3v1, MakeID3v1 from ._tags import ID3Tags +from ._frames import (AENC, APIC, ASPI, BUF, CHAP, CNT, COM, COMM, COMR, CRA, + CRM, CTOC, ENCR, EQU2, ETC, ETCO, GEO, GEOB, GP1, GRID, GRP1, IPL, IPLS, + LINK, LNK, MCDI, MCI, MLL, MLLT, MVI, MVIN, MVN, MVNM, OWNE, PCNT, PCST, + PIC, POP, POPM, POSS, PRIV, RBUF, REV, RVA, RVA2, RVAD, RVRB, SEEK, SIGN, + SLT, STC, SYLT, SYTC, TAL, TALB, TBP, TBPM, TCAT, TCM, TCMP, TCO, TCOM, + TCON, TCOP, TCP, TCR, TDA, TDAT, TDEN, TDES, TDLY, TDOR, TDRC, TDRL, TDTG, + TDY, TEN, TENC, TEXT, TFLT, TFT, TGID, TIM, TIME, TIPL, TIT1, TIT2, TIT3, + TKE, TKEY, TKWD, TLA, TLAN, TLE, TLEN, TMCL, TMED, TMOO, TMT, TOA, TOAL, + TOF, TOFN, TOL, TOLY, TOPE, TOR, TORY, TOT, TOWN, TP1, TP2, TP3, TP4, TPA, + TPB, TPE1, TPE2, TPE3, TPE4, TPOS, TPRO, TPUB, TRC, TRCK, TRD, TRDA, TRK, + TRSN, TRSO, TS2, TSA, TSC, TSI, TSIZ, TSO2, TSOA, TSOC, TSOP, TSOT, TSP, + TSRC, TSS, TSSE, TSST, TST, TT1, TT2, TT3, TXT, TXX, TXXX, TYE, TYER, UFI, + UFID, ULT, USER, USLT, WAF, WAR, WAS, WCM, WCOM, WCOP, WCP, WFED, WOAF, + WOAR, WOAS, WORS, WPAY, WPB, WPUB, WXX, WXXX) # deprecated from ._util import ID3EncryptionUnsupportedError, ID3JunkFrameError, \ ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, ID3Warning, \ BitPaddedInt as _BitPaddedIntForPicard - -for f in Frames: - globals()[f] = Frames[f] -for f in Frames_2_2: - globals()[f] = Frames_2_2[f] - # support open(filename) as interface Open = ID3 -# pyflakes +# flake8 ID3, ID3FileType, delete, ID3v1SaveOptions, Encoding, PictureType, CTOCFlags, ID3TimeStamp, Frames, Frames_2_2, Frame, TextFrame, UrlFrame, UrlFrameU, TimeStampTextFrame, BinaryFrame, NumericPartTextFrame, NumericTextFrame, @@ -62,6 +70,20 @@ ParseID3v1, MakeID3v1, ID3Tags, ID3EncryptionUnsupportedError, ID3JunkFrameError, ID3BadUnsynchData, ID3BadCompressedData, ID3TagError, ID3Warning +AENC, APIC, ASPI, BUF, CHAP, CNT, COM, COMM, COMR, CRA, CRM, CTOC, ENCR, EQU2, +ETC, ETCO, GEO, GEOB, GP1, GRID, GRP1, IPL, IPLS, LINK, LNK, MCDI, MCI, MLL, +MLLT, MVI, MVIN, MVN, MVNM, OWNE, PCNT, PCST, PIC, POP, POPM, POSS, PRIV, +RBUF, REV, RVA, RVA2, RVAD, RVRB, SEEK, SIGN, SLT, STC, SYLT, SYTC, TAL, TALB, +TBP, TBPM, TCAT, TCM, TCMP, TCO, TCOM, TCON, TCOP, TCP, TCR, TDA, TDAT, TDEN, +TDES, TDLY, TDOR, TDRC, TDRL, TDTG, TDY, TEN, TENC, TEXT, TFLT, TFT, TGID, +TIM, TIME, TIPL, TIT1, TIT2, TIT3, TKE, TKEY, TKWD, TLA, TLAN, TLE, TLEN, +TMCL, TMED, TMOO, TMT, TOA, TOAL, TOF, TOFN, TOL, TOLY, TOPE, TOR, TORY, TOT, +TOWN, TP1, TP2, TP3, TP4, TPA, TPB, TPE1, TPE2, TPE3, TPE4, TPOS, TPRO, TPUB, +TRC, TRCK, TRD, TRDA, TRK, TRSN, TRSO, TS2, TSA, TSC, TSI, TSIZ, TSO2, TSOA, +TSOC, TSOP, TSOT, TSP, TSRC, TSS, TSSE, TSST, TST, TT1, TT2, TT3, TXT, TXX, +TXXX, TYE, TYER, UFI, UFID, ULT, USER, USLT, WAF, WAR, WAS, WCM, WCOM, WCOP, +WCP, WFED, WOAF, WOAR, WOAS, WORS, WPAY, WPB, WPUB, WXX, WXXX + # Workaround for http://tickets.musicbrainz.org/browse/PICARD-833 class _DummySpecForPicard(object): diff --git a/lib/mutagen/id3/_file.py b/lib/mutagen/id3/_file.py old mode 100755 new mode 100644 index 3d19389a..0ec11bd4 --- a/lib/mutagen/id3/_file.py +++ b/lib/mutagen/id3/_file.py @@ -53,8 +53,8 @@ class ID3(ID3Tags, mutagen.Metadata): filething (filething): or `None` Attributes: - version (Tuple[int]): ID3 tag version as a tuple - unknown_frames (List[bytes]): raw frame data of any unknown frames + version (tuple[int]): ID3 tag version as a tuple + unknown_frames (list[bytes]): raw frame data of any unknown frames found size (int): the total size of the ID3 tag, including the header """ @@ -78,8 +78,6 @@ class ID3(ID3Tags, mutagen.Metadata): @property def version(self): - """`tuple`: ID3 tag version as a tuple (of the loaded file)""" - if self._header is not None: return self._header.version return self._version @@ -112,10 +110,9 @@ class ID3(ID3Tags, mutagen.Metadata): @convert_error(IOError, error) @loadfile() - def load(self, filething, known_frames=None, translate=True, v2_version=4): - """load(filething, known_frames=None, translate=True, v2_version=4) - - Load tags from a filename. + def load(self, filething, known_frames=None, translate=True, v2_version=4, + load_v1=True): + """Load tags from a filename. Args: filename (filething): filename or file object to load tag data from @@ -126,6 +123,11 @@ class ID3(ID3Tags, mutagen.Metadata): call update_to_v23() / update_to_v24() manually. v2_version (int): if update_to_v23 or update_to_v24 get called (3 or 4) + load_v1 (bool): Load tags from ID3v1 header if present. If both + ID3v1 and ID3v2 headers are present, combine the tags from + the two, with ID3v2 having precedence. + + .. versionadded:: 1.42 Example of loading a custom frame:: @@ -149,13 +151,17 @@ class ID3(ID3Tags, mutagen.Metadata): try: self._header = ID3Header(fileobj) except (ID3NoHeaderError, ID3UnsupportedVersionError): - frames, offset = find_id3v1(fileobj) + if not load_v1: + raise + + frames, offset = find_id3v1(fileobj, v2_version, known_frames) if frames is None: raise self.version = ID3Header._V11 - for v in list(frames.values()): - self.add(v) + for v in frames.values(): + if len(self.getall(v.HashKey)) == 0: + self.add(v) else: # XXX: attach to the header object so we have it in spec parsing.. if known_frames is not None: @@ -165,6 +171,14 @@ class ID3(ID3Tags, mutagen.Metadata): remaining_data = self._read(self._header, data) self._padding = len(remaining_data) + if load_v1: + v1v2_ver = 4 if self.version[1] == 4 else 3 + frames, offset = find_id3v1(fileobj, v1v2_ver, known_frames) + if frames: + for v in frames.values(): + if len(self.getall(v.HashKey)) == 0: + self.add(v) + if translate: if v2_version == 3: self.update_to_v23() @@ -204,13 +218,14 @@ class ID3(ID3Tags, mutagen.Metadata): @convert_error(IOError, error) @loadfile(writable=True, create=True) - def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + def save(self, filething=None, v1=1, v2_version=4, v23_sep='/', + padding=None): """save(filething=None, v1=1, v2_version=4, v23_sep='/', padding=None) Save changes to a file. Args: - filename (fspath): + filething (filething): Filename to save the tag to. If no filename is given, the one most recently loaded is used. v1 (ID3v1SaveOptions): @@ -223,7 +238,7 @@ class ID3(ID3Tags, mutagen.Metadata): the separator used to join multiple text values if v2_version == 3. Defaults to '/' but if it's None will be the ID3v2v2.4 null separator. - padding (PaddingFunction) + padding (:obj:`mutagen.PaddingFunction`) Raises: mutagen.MutagenError @@ -268,7 +283,7 @@ class ID3(ID3Tags, mutagen.Metadata): f.truncate() @loadfile(writable=True) - def delete(self, filething, delete_v1=True, delete_v2=True): + def delete(self, filething=None, delete_v1=True, delete_v2=True): """delete(filething=None, delete_v1=True, delete_v2=True) Remove tags from a file. @@ -352,7 +367,7 @@ class ID3FileType(mutagen.FileType): @staticmethod def pprint(): - return "Unknown format with ID3 tag" + return u"Unknown format with ID3 tag" @staticmethod def score(filename, fileobj, header_data): diff --git a/lib/mutagen/id3/_frames.py b/lib/mutagen/id3/_frames.py old mode 100755 new mode 100644 index 593276f6..7b778b1c --- a/lib/mutagen/id3/_frames.py +++ b/lib/mutagen/id3/_frames.py @@ -18,8 +18,6 @@ from ._specs import BinaryDataSpec, StringSpec, Latin1TextSpec, \ KeyEventSpec, TimeStampSpec, EncodedNumericPartTextSpec, \ EncodedNumericTextSpec, SpecError, PictureTypeSpec, ID3FramesSpec, \ Latin1TextListSpec, CTOCFlagsSpec, FrameIDSpec, RVASpec -from .._compat import text_type, string_types, swap_to_string, iteritems, \ - izip, itervalues def _bytes2key(b): @@ -265,7 +263,7 @@ class Frame(object): if tflags & Frame.FLAG24_COMPRESS: try: data = zlib.decompress(data) - except zlib.error as err: + except zlib.error: # the initial mutagen that went out with QL 0.12 did not # write the 4 bytes of uncompressed size. Compensate. data = datalen_bytes + data @@ -277,6 +275,8 @@ class Frame(object): elif header.version >= header._V23: if tflags & Frame.FLAG23_COMPRESS: + if len(data) < 4: + raise ID3JunkFrameError('frame too small: %r' % data) usize, = unpack('>L', data[:4]) data = data[4:] if tflags & Frame.FLAG23_ENCRYPT: @@ -329,11 +329,11 @@ class CHAP(Frame): __hash__ = Frame.__hash__ def _pprint(self): - frame_pprint = "" - for frame in itervalues(self.sub_frames): + frame_pprint = u"" + for frame in self.sub_frames.values(): for line in frame.pprint().splitlines(): frame_pprint += "\n" + " " * 4 + line - return "%s time=%d..%d offset=%d..%d%s" % ( + return u"%s time=%d..%d offset=%d..%d%s" % ( self.element_id, self.start_time, self.end_time, self.start_offset, self.end_offset, frame_pprint) @@ -368,16 +368,15 @@ class CTOC(Frame): self.child_element_ids == other.child_element_ids def _pprint(self): - frame_pprint = "" + frame_pprint = u"" if getattr(self, "sub_frames", None): frame_pprint += "\n" + "\n".join( - [" " * 4 + f.pprint() for f in list(self.sub_frames.values())]) - return "%s flags=%d child_element_ids=%s%s" % ( + [" " * 4 + f.pprint() for f in self.sub_frames.values()]) + return u"%s flags=%d child_element_ids=%s%s" % ( self.element_id, int(self.flags), - ",".join(self.child_element_ids), frame_pprint) + u",".join(self.child_element_ids), frame_pprint) -@swap_to_string class TextFrame(Frame): """Text strings. @@ -395,20 +394,20 @@ class TextFrame(Frame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] def __bytes__(self): - return text_type(self).encode('utf-8') + return str(self).encode('utf-8') def __str__(self): - return '\u0000'.join(self.text) + return u'\u0000'.join(self.text) def __eq__(self, other): if isinstance(other, bytes): return bytes(self) == other - elif isinstance(other, text_type): - return text_type(self) == other + elif isinstance(other, str): + return str(self) == other return self.text == other __hash__ = Frame.__hash__ @@ -434,6 +433,7 @@ class TextFrame(Frame): for val in other[:]: if val not in self: self.append(val) + self.encoding = max(self.encoding, other.encoding) return self def _pprint(self): @@ -451,7 +451,7 @@ class NumericTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedNumericTextSpec('text'), sep='\u0000', + MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000', default=[]), ] @@ -472,7 +472,7 @@ class NumericPartTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedNumericPartTextSpec('text'), sep='\u0000', + MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000', default=[]), ] @@ -480,7 +480,6 @@ class NumericPartTextFrame(TextFrame): return int(self.text[0].split("/")[0]) -@swap_to_string class TimeStampTextFrame(TextFrame): """A list of time stamps. @@ -490,20 +489,19 @@ class TimeStampTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', TimeStampSpec('stamp'), sep=',', default=[]), + MultiSpec('text', TimeStampSpec('stamp'), sep=u',', default=[]), ] def __bytes__(self): - return text_type(self).encode('utf-8') + return str(self).encode('utf-8') def __str__(self): - return ','.join([stamp.text for stamp in self.text]) + return u','.join([stamp.text for stamp in self.text]) def _pprint(self): - return " / ".join([stamp.text for stamp in self.text]) + return u" / ".join([stamp.text for stamp in self.text]) -@swap_to_string class UrlFrame(Frame): """A frame containing a URL string. @@ -574,11 +572,11 @@ class TCON(TextFrame): try: genres.append(self.GENRES[int(value)]) except IndexError: - genres.append("Unknown") + genres.append(u"Unknown") elif value == "CR": - genres.append("Cover") + genres.append(u"Cover") elif value == "RX": - genres.append("Remix") + genres.append(u"Remix") elif value: newgenres = [] genreid, dummy, genrename = genre_re.match(value).groups() @@ -586,14 +584,14 @@ class TCON(TextFrame): if genreid: for gid in genreid[1:-1].split(")("): if gid.isdigit() and int(gid) < len(self.GENRES): - gid = text_type(self.GENRES[int(gid)]) + gid = str(self.GENRES[int(gid)]) newgenres.append(gid) elif gid == "CR": - newgenres.append("Cover") + newgenres.append(u"Cover") elif gid == "RX": - newgenres.append("Remix") + newgenres.append(u"Remix") else: - newgenres.append("Unknown") + newgenres.append(u"Unknown") if genrename: # "Unescaping" the first parenthesis @@ -607,7 +605,7 @@ class TCON(TextFrame): return genres def __set_genres(self, genres): - if isinstance(genres, string_types): + if isinstance(genres, str): genres = [genres] self.text = [self.__decode(g) for g in genres] @@ -669,6 +667,14 @@ class MVI(MVIN): "iTunes Movement Number/Count" +class GRP1(TextFrame): + "iTunes Grouping" + + +class GP1(GRP1): + "iTunes Grouping" + + class TDOR(TimeStampTextFrame): "Original Release Time" @@ -860,7 +866,7 @@ class TXXX(TextFrame): _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -1035,7 +1041,6 @@ class SYTC(Frame): __hash__ = Frame.__hash__ -@swap_to_string class USLT(Frame): """Unsynchronised lyrics/text transcription. @@ -1045,7 +1050,7 @@ class USLT(Frame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - StringSpec('lang', length=3, default="XXX"), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('desc'), EncodedTextSpec('text'), ] @@ -1065,14 +1070,16 @@ class USLT(Frame): __hash__ = Frame.__hash__ + def _pprint(self): + return "%s=%s=%s" % (self.desc, self.lang, self.text) + -@swap_to_string class SYLT(Frame): """Synchronised lyrics/text.""" _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', length=3, default="XXX"), + StringSpec('lang', length=3, default=u"XXX"), ByteSpec('format', default=1), ByteSpec('type', default=0), EncodedTextSpec('desc'), @@ -1083,16 +1090,21 @@ class SYLT(Frame): def HashKey(self): return '%s:%s:%s' % (self.FrameID, self.desc, self.lang) + def _pprint(self): + return str(self) + def __eq__(self, other): return str(self) == other __hash__ = Frame.__hash__ def __str__(self): - return "".join(text for (text, time) in self.text) + unit = 'fr' if self.format == 1 else 'ms' + return u"\n".join("[{0}{1}]: {2}".format(time, unit, text) + for (text, time) in self.text) def __bytes__(self): - return text_type(self).encode("utf-8") + return str(self).encode("utf-8") class COMM(TextFrame): @@ -1106,7 +1118,7 @@ class COMM(TextFrame): EncodingSpec('encoding'), StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), ] @property @@ -1263,11 +1275,11 @@ class APIC(Frame): return '%s:%s' % (self.FrameID, self.desc) def _merge_frame(self, other): - other.desc += " " + other.desc += u" " return other def _pprint(self): - type_desc = text_type(self.type) + type_desc = str(self.type) if hasattr(self.type, "_pprint"): type_desc = self.type._pprint() @@ -1297,7 +1309,7 @@ class PCNT(Frame): return self.count def _pprint(self): - return text_type(self.count) + return str(self.count) class PCST(Frame): @@ -1316,7 +1328,7 @@ class PCST(Frame): return self.value def _pprint(self): - return text_type(self.value) + return str(self.value) class POPM(Frame): @@ -1420,7 +1432,6 @@ class RBUF(Frame): return self.size -@swap_to_string class AENC(Frame): """Audio encryption. @@ -1537,7 +1548,6 @@ class UFID(Frame): return "%s=%r" % (self.owner, self.data) -@swap_to_string class USER(Frame): """Terms of use. @@ -1550,7 +1560,7 @@ class USER(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', length=3, default="XXX"), + StringSpec('lang', length=3, default=u"XXX"), EncodedTextSpec('text'), ] @@ -1573,14 +1583,13 @@ class USER(Frame): return "%r=%s" % (self.lang, self.text) -@swap_to_string class OWNE(Frame): """Ownership frame.""" _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('date', length=8, default="19700101"), + StringSpec('date', length=8, default=u"19700101"), EncodedTextSpec('seller'), ] @@ -1602,7 +1611,7 @@ class COMR(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('valid_until', length=8, default="19700101"), + StringSpec('valid_until', length=8, default=u"19700101"), Latin1TextSpec('contact'), ByteSpec('format', default=0), EncodedTextSpec('seller'), @@ -1624,7 +1633,6 @@ class COMR(Frame): __hash__ = Frame.__hash__ -@swap_to_string class ENCR(Frame): """Encryption method registration. @@ -1651,7 +1659,6 @@ class ENCR(Frame): __hash__ = Frame.__hash__ -@swap_to_string class GRID(Frame): """Group identification registration.""" @@ -1680,7 +1687,6 @@ class GRID(Frame): __hash__ = Frame.__hash__ -@swap_to_string class PRIV(Frame): """Private frame.""" @@ -1706,7 +1712,6 @@ class PRIV(Frame): __hash__ = Frame.__hash__ -@swap_to_string class SIGN(Frame): """Signature frame.""" @@ -2118,7 +2123,7 @@ Frames_2_2 = {} k, v = None, None -for k, v in iteritems(globals()): +for k, v in globals().items(): if isinstance(v, type) and issubclass(v, Frame): v.__module__ = "mutagen.id3" diff --git a/lib/mutagen/id3/_id3v1.py b/lib/mutagen/id3/_id3v1.py old mode 100755 new mode 100644 index df9303be..13138e75 --- a/lib/mutagen/id3/_id3v1.py +++ b/lib/mutagen/id3/_id3v1.py @@ -11,22 +11,32 @@ import errno from struct import error as StructError, unpack -from mutagen._util import chr_, text_type +from mutagen._util import bchr -from ._frames import TCON, TRCK, COMM, TDRC, TALB, TPE1, TIT2 +from ._frames import TCON, TRCK, COMM, TDRC, TYER, TALB, TPE1, TIT2 -def find_id3v1(fileobj): +def find_id3v1(fileobj, v2_version=4, known_frames=None): """Returns a tuple of (id3tag, offset_to_end) or (None, 0) offset mainly because we used to write too short tags in some cases and we need the offset to delete them. + + v2_version: Decides whether ID3v2.3 or ID3v2.4 tags + should be returned. Must be 3 or 4. + + known_frames (Dict[`mutagen.text`, `Frame`]): dict mapping frame + IDs to Frame objects """ + if v2_version not in (3, 4): + raise ValueError("Only 3 and 4 possible for v2_version") + # id3v1 is always at the end (after apev2) extra_read = b"APETAGEX".index(b"TAG") + old_pos = fileobj.tell() try: fileobj.seek(-128 - extra_read, 2) except IOError as e: @@ -38,6 +48,7 @@ def find_id3v1(fileobj): raise data = fileobj.read(128 + extra_read) + fileobj.seek(old_pos, 0) try: idx = data.index(b"TAG") except ValueError: @@ -53,7 +64,7 @@ def find_id3v1(fileobj): if idx == ape_idx + extra_read: return (None, 0) - tag = ParseID3v1(data[idx:]) + tag = ParseID3v1(data[idx:], v2_version, known_frames) if tag is None: return (None, 0) @@ -62,12 +73,21 @@ def find_id3v1(fileobj): # ID3v1.1 support. -def ParseID3v1(data): - """Parse an ID3v1 tag, returning a list of ID3v2.4 frames. +def ParseID3v1(data, v2_version=4, known_frames=None): + """Parse an ID3v1 tag, returning a list of ID3v2 frames Returns a {frame_name: frame} dict or None. + + v2_version: Decides whether ID3v2.3 or ID3v2.4 tags + should be returned. Must be 3 or 4. + + known_frames (Dict[`mutagen.text`, `Frame`]): dict mapping frame + IDs to Frame objects """ + if v2_version not in (3, 4): + raise ValueError("Only 3 and 4 possible for v2_version") + try: data = data[data.index(b"TAG"):] except ValueError: @@ -94,26 +114,48 @@ def ParseID3v1(data): def fix(data): return data.split(b"\x00")[0].strip().decode('latin1') - title, artist, album, year, comment = list(map( - fix, [title, artist, album, year, comment])) + title, artist, album, year, comment = map( + fix, [title, artist, album, year, comment]) + + frame_class = { + "TIT2": TIT2, + "TPE1": TPE1, + "TALB": TALB, + "TYER": TYER, + "TDRC": TDRC, + "COMM": COMM, + "TRCK": TRCK, + "TCON": TCON, + } + for key in frame_class: + if known_frames is not None: + if key in known_frames: + frame_class[key] = known_frames[key] + else: + frame_class[key] = None frames = {} - if title: - frames["TIT2"] = TIT2(encoding=0, text=title) - if artist: - frames["TPE1"] = TPE1(encoding=0, text=[artist]) - if album: - frames["TALB"] = TALB(encoding=0, text=album) + if title and frame_class["TIT2"]: + frames["TIT2"] = frame_class["TIT2"](encoding=0, text=title) + if artist and frame_class["TPE1"]: + frames["TPE1"] = frame_class["TPE1"](encoding=0, text=[artist]) + if album and frame_class["TALB"]: + frames["TALB"] = frame_class["TALB"](encoding=0, text=album) if year: - frames["TDRC"] = TDRC(encoding=0, text=year) - if comment: - frames["COMM"] = COMM( + if v2_version == 3 and frame_class["TYER"]: + frames["TYER"] = frame_class["TYER"](encoding=0, text=year) + elif frame_class["TDRC"]: + frames["TDRC"] = frame_class["TDRC"](encoding=0, text=year) + if comment and frame_class["COMM"]: + frames["COMM"] = frame_class["COMM"]( encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) + # Don't read a track number if it looks like the comment was # padded with spaces instead of nulls (thanks, WinAmp). - if track and ((track != 32) or (data[-3] == b'\x00'[0])): + if (track and frame_class["TRCK"] and + ((track != 32) or (data[-3] == b'\x00'[0]))): frames["TRCK"] = TRCK(encoding=0, text=str(track)) - if genre != 255: + if genre != 255 and frame_class["TCON"]: frames["TCON"] = TCON(encoding=0, text=str(genre)) return frames @@ -123,8 +165,8 @@ def MakeID3v1(id3): v1 = {} - for v2id, name in list({"TIT2": "title", "TPE1": "artist", - "TALB": "album"}.items()): + for v2id, name in {"TIT2": "title", "TPE1": "artist", + "TALB": "album"}.items(): if v2id in id3: text = id3[v2id].text[0].encode('latin1', 'replace')[:30] else: @@ -139,7 +181,7 @@ def MakeID3v1(id3): if "TRCK" in id3: try: - v1["track"] = chr_(+id3["TRCK"]) + v1["track"] = bchr(+id3["TRCK"]) except ValueError: v1["track"] = b"\x00" else: @@ -152,14 +194,14 @@ def MakeID3v1(id3): pass else: if genre in TCON.GENRES: - v1["genre"] = chr_(TCON.GENRES.index(genre)) + v1["genre"] = bchr(TCON.GENRES.index(genre)) if "genre" not in v1: v1["genre"] = b"\xff" if "TDRC" in id3: - year = text_type(id3["TDRC"]).encode('ascii') + year = str(id3["TDRC"]).encode('ascii') elif "TYER" in id3: - year = text_type(id3["TYER"]).encode('ascii') + year = str(id3["TYER"]).encode('ascii') else: year = b"" v1["year"] = (year + b"\x00\x00\x00\x00")[:4] diff --git a/lib/mutagen/id3/_specs.py b/lib/mutagen/id3/_specs.py old mode 100755 new mode 100644 index 48ed6369..99d42206 --- a/lib/mutagen/id3/_specs.py +++ b/lib/mutagen/id3/_specs.py @@ -10,10 +10,8 @@ import struct import codecs from struct import unpack, pack -from .._compat import text_type, chr_, PY3, swap_to_string, string_types, \ - xrange -from .._util import total_ordering, decode_terminated, enum, izip, flags, \ - cdata, encode_endian +from .._util import total_ordering, decode_terminated, enum, flags, \ + cdata, encode_endian, intround, bchr from ._util import BitPaddedInt, is_valid_frame_id @@ -87,7 +85,7 @@ class PictureType(object): """Publisher/Studio logotype""" def _pprint(self): - return text_type(self).split(".", 1)[-1].lower().replace("_", " ") + return str(self).split(".", 1)[-1].lower().replace("_", " ") @flags @@ -165,11 +163,11 @@ class ByteSpec(Spec): return bytearray(data)[0], data[1:] def write(self, config, frame, value): - return chr_(value) + return bchr(value) def validate(self, frame, value): if value is not None: - chr_(value) + bchr(value) return value @@ -278,7 +276,7 @@ class StringSpec(Spec): def __init__(self, name, length, default=None): if default is None: - default = " " * length + default = u" " * length super(StringSpec, self).__init__(name, default) self.len = length @@ -289,26 +287,22 @@ class StringSpec(Spec): except UnicodeDecodeError: raise SpecError("not ascii") else: - if PY3: - chunk = ascii + chunk = ascii return chunk, data[s.len:] def write(self, config, frame, value): - if PY3: - value = value.encode("ascii") + + value = value.encode("ascii") return (bytes(value) + b'\x00' * self.len)[:self.len] def validate(self, frame, value): if value is None: raise TypeError - if PY3: - if not isinstance(value, str): - raise TypeError("%s has to be str" % self.name) - value.encode("ascii") - else: - if not isinstance(value, bytes): - value = value.encode("ascii") + + if not isinstance(value, str): + raise TypeError("%s has to be str" % self.name) + value.encode("ascii") if len(value) == self.len: return value @@ -402,7 +396,7 @@ class RVASpec(Spec): class FrameIDSpec(StringSpec): def __init__(self, name, length): - super(FrameIDSpec, self).__init__(name, length, "X" * length) + super(FrameIDSpec, self).__init__(name, length, u"X" * length) def validate(self, frame, value): value = super(FrameIDSpec, self).validate(frame, value) @@ -424,7 +418,7 @@ class BinaryDataSpec(Spec): def write(self, config, frame, value): if isinstance(value, bytes): return value - value = text_type(value).encode("ascii") + value = str(value).encode("ascii") return value def validate(self, frame, value): @@ -432,10 +426,10 @@ class BinaryDataSpec(Spec): raise TypeError if isinstance(value, bytes): return value - elif PY3: + else: raise TypeError("%s has to be bytes" % self.name) - value = text_type(value).encode("ascii") + value = str(value).encode("ascii") return value @@ -464,7 +458,7 @@ class EncodedTextSpec(Spec): Encoding.UTF8: ('utf8', b'\x00'), } - def __init__(self, name, default=""): + def __init__(self, name, default=u""): super(EncodedTextSpec, self).__init__(name, default) def read(self, header, frame, data): @@ -493,7 +487,7 @@ class EncodedTextSpec(Spec): raise SpecError(e) def validate(self, frame, value): - return text_type(value) + return str(value) class MultiSpec(Spec): @@ -527,7 +521,7 @@ class MultiSpec(Spec): return b''.join(data) def validate(self, frame, value): - if self.sep and isinstance(value, string_types): + if self.sep and isinstance(value, str): value = value.split(self.sep) if isinstance(value, list): if len(self.specs) == 1: @@ -568,7 +562,7 @@ class EncodedNumericPartTextSpec(EncodedTextSpec): class Latin1TextSpec(Spec): - def __init__(self, name, default=""): + def __init__(self, name, default=u""): super(Latin1TextSpec, self).__init__(name, default) def read(self, header, frame, data): @@ -582,7 +576,7 @@ class Latin1TextSpec(Spec): return value.encode('latin1') + b'\x00' def validate(self, frame, value): - return text_type(value) + return str(value) class ID3FramesSpec(Spec): @@ -602,7 +596,7 @@ class ID3FramesSpec(Spec): from ._tags import ID3Tags v = ID3Tags() - for frame in list(value.values()): + for frame in value.values(): v.add(frame._get_v23_frame(**kwargs)) return v @@ -647,7 +641,6 @@ class Latin1TextListSpec(Spec): return [self._lspec.validate(frame, v) for v in value] -@swap_to_string @total_ordering class ID3TimeStamp(object): """A time stamp in ID3v2 format. @@ -665,10 +658,8 @@ class ID3TimeStamp(object): def __init__(self, text): if isinstance(text, ID3TimeStamp): text = text.text - elif not isinstance(text, text_type): - if PY3: - raise TypeError("not a str") - text = text.decode("utf-8") + elif not isinstance(text, str): + raise TypeError("not a str") self.text = text @@ -683,9 +674,9 @@ class ID3TimeStamp(object): if part is None: break pieces.append(self.__formats[i] % part + self.__seps[i]) - return ''.join(pieces)[:-1] + return u''.join(pieces)[:-1] - def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')): + def set_text(self, text, splitre=re.compile('[-T:/.]|\\s+')): year, month, day, hour, minute, second = \ splitre.split(text + ':::::')[:6] for a in 'year month day hour minute second'.split(): @@ -745,7 +736,7 @@ class VolumeAdjustmentSpec(Spec): return value / 512.0, data[2:] def write(self, config, frame, value): - number = int(round(value * 512)) + number = intround(value * 512) # pack only fails in 2.7, do it manually in 2.6 if not -32768 <= number <= 32767: raise SpecError("not in range") @@ -778,7 +769,7 @@ class VolumePeakSpec(Spec): return (float(peak) / (2 ** 31 - 1)), data[1 + vol_bytes:] def write(self, config, frame, value): - number = int(round(value * 32768)) + number = intround(value * 32768) # pack only fails in 2.7, do it manually in 2.6 if not 0 <= number <= 65535: raise SpecError("not in range") diff --git a/lib/mutagen/id3/_tags.py b/lib/mutagen/id3/_tags.py old mode 100755 new mode 100644 index 5ffca7bc..1774fd3c --- a/lib/mutagen/id3/_tags.py +++ b/lib/mutagen/id3/_tags.py @@ -7,11 +7,12 @@ # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. +import re import struct +from itertools import zip_longest from mutagen._tags import Tags from mutagen._util import DictProxy, convert_error, read_full -from mutagen._compat import PY3, text_type, itervalues from ._util import BitPaddedInt, unsynch, ID3JunkFrameError, \ ID3EncryptionUnsupportedError, is_valid_frame_id, error, \ @@ -82,10 +83,7 @@ class ID3Header(object): if self.f_extended: extsize_data = read_full(fileobj, 4) - if PY3: - frame_id = extsize_data.decode("ascii", "replace") - else: - frame_id = extsize_data + frame_id = extsize_data.decode("ascii", "replace") if frame_id in Frames: # Some tagger sets the extended header flag but @@ -131,11 +129,10 @@ def determine_bpi(data, frames, EMPTY=b"\x00" * 10): name, size, flags = struct.unpack('>4sLH', part) size = BitPaddedInt(size) o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue if name in frames: asbpi += 1 else: @@ -151,11 +148,10 @@ def determine_bpi(data, frames, EMPTY=b"\x00" * 10): break name, size, flags = struct.unpack('>4sLH', part) o += 10 + size - if PY3: - try: - name = name.decode("ascii") - except UnicodeDecodeError: - continue + try: + name = name.decode("ascii") + except UnicodeDecodeError: + continue if name in frames: asint += 1 else: @@ -191,7 +187,7 @@ class ID3Tags(DictProxy, Tags): order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] framedata = [ - (f, save_frame(f, config=config)) for f in itervalues(self)] + (f, save_frame(f, config=config)) for f in self.values()] def get_prio(frame): try: @@ -236,14 +232,14 @@ class ID3Tags(DictProxy, Tags): return [self[key]] else: key = key + ":" - return [v for s, v in list(self.items()) if s.startswith(key)] + return [v for s, v in self.items() if s.startswith(key)] def setall(self, key, values): """Delete frames of the given type and add frames in 'values'. Args: key (text): key for frames to delete - values (List[`Frame`]): frames to add + values (list[Frame]): frames to add """ self.delall(key) @@ -280,7 +276,7 @@ class ID3Tags(DictProxy, Tags): ``POPM=user@example.org=3 128/255`` """ - frames = sorted(Frame.pprint(s) for s in list(self.values())) + frames = sorted(Frame.pprint(s) for s in self.values()) return "\n".join(frames) def _add(self, frame, strict): @@ -369,24 +365,23 @@ class ID3Tags(DictProxy, Tags): self.__update_common() # TDAT, TYER, and TIME have been turned into TDRC. - try: - date = text_type(self.get("TYER", "")) - if date.strip("\x00"): - self.pop("TYER") - dat = text_type(self.get("TDAT", "")) - if dat.strip("\x00"): - self.pop("TDAT") - date = "%s-%s-%s" % (date, dat[2:], dat[:2]) - time = text_type(self.get("TIME", "")) - if time.strip("\x00"): - self.pop("TIME") - date += "T%s:%s:00" % (time[:2], time[2:]) - if "TDRC" not in self: - self.add(TDRC(encoding=0, text=date)) - except UnicodeDecodeError: - # Old ID3 tags have *lots* of Unicode problems, so if TYER - # is bad, just chuck the frames. - pass + timestamps = [] + old_frames = [self.pop(n, []) for n in ["TYER", "TDAT", "TIME"]] + for y, d, t in zip_longest(*old_frames, fillvalue=u""): + ym = re.match(r"([0-9]+)\Z", y) + dm = re.match(r"([0-9]{2})([0-9]{2})\Z", d) + tm = re.match(r"([0-9]{2})([0-9]{2})\Z", t) + timestamp = "" + if ym: + timestamp += u"%s" % ym.groups() + if dm: + timestamp += u"-%s-%s" % dm.groups()[::-1] + if tm: + timestamp += u"T%s:%s:00" % tm.groups() + if timestamp: + timestamps.append(timestamp) + if timestamps and "TDRC" not in self: + self.add(TDRC(encoding=0, text=timestamps)) # TORY can be the first part of a TDOR. if "TORY" in self: @@ -482,7 +477,7 @@ class ID3Tags(DictProxy, Tags): def _copy(self): """Creates a shallow copy of all tags""" - items = list(self.items()) + items = self.items() subs = {} for f in (self.getall("CHAP") + self.getall("CTOC")): subs[f.HashKey] = f.sub_frames._copy() @@ -533,8 +528,7 @@ def save_frame(frame, name=None, config=None): frame_name = name else: frame_name = type(frame).__name__ - if PY3: - frame_name = frame_name.encode("ascii") + frame_name = frame_name.encode("ascii") header = struct.pack('>4s4sH', frame_name, datasize, flags) return header + framedata @@ -575,11 +569,10 @@ def read_frames(id3, data, frames): if size == 0: continue # drop empty frames - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue try: # someone writes 2.3 frames with 2.2 names @@ -614,11 +607,10 @@ def read_frames(id3, data, frames): if size == 0: continue # drop empty frames - if PY3: - try: - name = name.decode('ascii') - except UnicodeDecodeError: - continue + try: + name = name.decode('ascii') + except UnicodeDecodeError: + continue try: tag = frames[name] diff --git a/lib/mutagen/id3/_util.py b/lib/mutagen/id3/_util.py old mode 100755 new mode 100644 index 93bb264e..76932b26 --- a/lib/mutagen/id3/_util.py +++ b/lib/mutagen/id3/_util.py @@ -8,7 +8,6 @@ # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -from mutagen._compat import long_, integer_types, PY3 from mutagen._util import MutagenError @@ -110,7 +109,7 @@ class _BitPaddedMixin(object): mask = (((1 << (8 - bits)) - 1) << bits) - if isinstance(value, integer_types): + if isinstance(value, int): while value: if value & mask: return False @@ -133,7 +132,7 @@ class BitPaddedInt(int, _BitPaddedMixin): numeric_value = 0 shift = 0 - if isinstance(value, integer_types): + if isinstance(value, int): if value < 0: raise ValueError while value: @@ -149,21 +148,12 @@ class BitPaddedInt(int, _BitPaddedMixin): else: raise TypeError - if isinstance(numeric_value, int): - self = int.__new__(BitPaddedInt, numeric_value) - else: - self = long_.__new__(BitPaddedLong, numeric_value) + self = int.__new__(BitPaddedInt, numeric_value) self.bits = bits self.bigendian = bigendian return self -if PY3: - BitPaddedLong = BitPaddedInt -else: - class BitPaddedLong(long_, _BitPaddedMixin): - pass - class ID3BadUnsynchData(error, ValueError): """Deprecated""" diff --git a/lib/mutagen/m4a.py b/lib/mutagen/m4a.py old mode 100755 new mode 100644 index 5830e8b6..c7583f8e --- a/lib/mutagen/m4a.py +++ b/lib/mutagen/m4a.py @@ -66,7 +66,7 @@ class M4ATags(DictProxy, Tags): raise error("deprecated") def pprint(self): - return "" + return u"" class M4AInfo(StreamInfo): @@ -77,7 +77,7 @@ class M4AInfo(StreamInfo): raise error("deprecated") def pprint(self): - return "" + return u"" class M4A(FileType): diff --git a/lib/mutagen/monkeysaudio.py b/lib/mutagen/monkeysaudio.py old mode 100755 new mode 100644 index 5c6bdb50..6d5d021c --- a/lib/mutagen/monkeysaudio.py +++ b/lib/mutagen/monkeysaudio.py @@ -18,10 +18,9 @@ __all__ = ["MonkeysAudio", "Open", "delete"] import struct -from ._compat import endswith from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete -from mutagen._util import cdata, convert_error +from mutagen._util import cdata, convert_error, endswith class MonkeysAudioHeaderError(error): @@ -66,6 +65,9 @@ class MonkeysAudioInfo(StreamInfo): blocks_per_frame = 73728 else: blocks_per_frame = 9216 + self.bits_per_sample = 0 + if header[48:].startswith(b"WAVEfmt"): + self.bits_per_sample = struct.unpack(" 0): @@ -74,7 +76,7 @@ class MonkeysAudioInfo(StreamInfo): self.length = float(total_blocks) / self.sample_rate def pprint(self): - return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( + return u"Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( self.version, self.length, self.sample_rate) diff --git a/lib/mutagen/mp3/__init__.py b/lib/mutagen/mp3/__init__.py old mode 100755 new mode 100644 index ac4e2e2b..70a3891a --- a/lib/mutagen/mp3/__init__.py +++ b/lib/mutagen/mp3/__init__.py @@ -12,8 +12,7 @@ import struct from mutagen import StreamInfo from mutagen._util import MutagenError, enum, BitReader, BitReaderError, \ - convert_error -from mutagen._compat import endswith, xrange + convert_error, intround, endswith from mutagen.id3 import ID3FileType, delete from mutagen.id3._util import BitPaddedInt @@ -165,11 +164,13 @@ class MPEGFrame(object): # Try to find/parse the Xing header, which trumps the above length # and bitrate calculation. if self.layer == 3: - self._parse_vbr_header(fileobj, self.frame_offset, frame_size) + self._parse_vbr_header(fileobj, self.frame_offset, frame_size, + frame_length) fileobj.seek(self.frame_offset + frame_length, 0) - def _parse_vbr_header(self, fileobj, frame_offset, frame_size): + def _parse_vbr_header(self, fileobj, frame_offset, frame_size, + frame_length): """Does not raise""" # Xing @@ -186,6 +187,12 @@ class MPEGFrame(object): self.encoder_settings = xing.get_encoder_settings() if xing.frames != -1: samples = frame_size * xing.frames + if xing.bytes != -1 and samples > 0: + # the first frame is only included in xing.bytes but + # not in xing.frames, skip it. + audio_bytes = max(0, xing.bytes - frame_length) + self.bitrate = intround(( + audio_bytes * 8 * self.sample_rate) / float(samples)) if lame is not None: samples -= lame.encoder_delay_start samples -= lame.encoder_padding_end @@ -194,10 +201,8 @@ class MPEGFrame(object): # files with low bitrate samples = 0 self.length = float(samples) / self.sample_rate - if xing.bytes != -1 and self.length: - self.bitrate = int((xing.bytes * 8) / self.length) if xing.lame_version_desc: - self.encoder_info = "LAME %s" % xing.lame_version_desc + self.encoder_info = u"LAME %s" % xing.lame_version_desc if lame is not None: self.track_gain = lame.track_gain_adjustment self.track_peak = lame.track_peak @@ -213,7 +218,7 @@ class MPEGFrame(object): pass else: self.bitrate_mode = BitrateMode.VBR - self.encoder_info = "FhG" + self.encoder_info = u"FhG" self.sketchy = False self.length = float(frame_size * vbri.frames) / self.sample_rate if self.length: @@ -297,8 +302,10 @@ class MPEGInfo(StreamInfo): Attributes: length (`float`): audio length, in seconds channels (`int`): number of audio channels - bitrate (`int`): audio bitrate, in bits per second - sample_rate (`int`) audio sample rate, in Hz + bitrate (`int`): audio bitrate, in bits per second. + In case :attr:`bitrate_mode` is :attr:`BitrateMode.UNKNOWN` the + bitrate is guessed based on the first frame. + sample_rate (`int`): audio sample rate, in Hz encoder_info (`mutagen.text`): a string containing encoder name and possibly version. In case a lame tag is present this will start with ``"LAME "``, if unknown it is empty, otherwise the @@ -318,13 +325,12 @@ class MPEGInfo(StreamInfo): layer (`int`): 1, 2, or 3 mode (`int`): One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) protected (`bool`): whether or not the file is "protected" - padding (`bool`) whether or not audio frames are padded sketchy (`bool`): if true, the file may not be valid MPEG audio """ sketchy = False - encoder_info = "" - encoder_settings = "" + encoder_info = u"" + encoder_settings = u"" bitrate_mode = BitrateMode.UNKNOWN track_gain = track_peak = album_gain = album_peak = None @@ -350,7 +356,7 @@ class MPEGInfo(StreamInfo): # find a sync in the first 1024K, give up after some invalid syncs max_read = 1024 * 1024 - max_syncs = 1000 + max_syncs = 1500 enough_frames = 4 min_frames = 2 @@ -411,16 +417,16 @@ class MPEGInfo(StreamInfo): def pprint(self): info = str(self.bitrate_mode).split(".", 1)[-1] if self.bitrate_mode == BitrateMode.UNKNOWN: - info = "CBR?" + info = u"CBR?" if self.encoder_info: info += ", %s" % self.encoder_info if self.encoder_settings: info += ", %s" % self.encoder_settings - s = "MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( + s = u"MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( self.version, self.layer, self.bitrate, info, self.sample_rate, self.channels, self.length) if self.sketchy: - s += " (sketchy)" + s += u" (sketchy)" return s diff --git a/lib/mutagen/mp3/_util.py b/lib/mutagen/mp3/_util.py old mode 100755 new mode 100644 index 973b2101..96d897d6 --- a/lib/mutagen/mp3/_util.py +++ b/lib/mutagen/mp3/_util.py @@ -11,10 +11,11 @@ http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header http://wiki.hydrogenaud.io/index.php?title=MP3 """ +from __future__ import division from functools import partial +from io import BytesIO -from mutagen._util import cdata, BitReader -from mutagen._compat import xrange, iterbytes, cBytesIO +from mutagen._util import cdata, BitReader, iterbytes class LAMEError(Exception): @@ -37,7 +38,9 @@ class LAMEHeader(object): """VBR quality: 0..9""" track_peak = None - """Peak signal amplitude as float. None if unknown.""" + """Peak signal amplitude as float. 1.0 is maximal signal amplitude + in decoded format. None if unknown. + """ track_gain_origin = 0 """see the docs""" @@ -106,7 +109,7 @@ class LAMEHeader(object): raise LAMEError("Not enough data") # extended lame header - r = BitReader(cBytesIO(payload)) + r = BitReader(BytesIO(payload)) revision = r.bits(4) if revision != 0: raise LAMEError("unsupported header revision %d" % revision) @@ -123,8 +126,7 @@ class LAMEHeader(object): self.track_peak = None else: # see PutLameVBR() in LAME's VbrTag.c - self.track_peak = ( - cdata.uint32_be(track_peak_data) - 0.5) / 2 ** 23 + self.track_peak = cdata.uint32_be(track_peak_data) / 2 ** 23 track_gain_type = r.bits(3) self.track_gain_origin = r.bits(3) sign = r.bits(1) @@ -194,64 +196,64 @@ class LAMEHeader(object): if self.vbr_method == 2: if version in ((3, 90), (3, 91), (3, 92)) and self.encoding_flags: if self.bitrate < 255: - return "--alt-preset %d" % self.bitrate + return u"--alt-preset %d" % self.bitrate else: - return "--alt-preset %d+" % self.bitrate + return u"--alt-preset %d+" % self.bitrate if self.preset_used != 0: - return "--preset %d" % self.preset_used + return u"--preset %d" % self.preset_used elif self.bitrate < 255: - return "--abr %d" % self.bitrate + return u"--abr %d" % self.bitrate else: - return "--abr %d+" % self.bitrate + return u"--abr %d+" % self.bitrate elif self.vbr_method == 1: if self.preset_used == 0: if self.bitrate < 255: - return "-b %d" % self.bitrate + return u"-b %d" % self.bitrate else: - return "-b 255+" + return u"-b 255+" elif self.preset_used == 1003: - return "--preset insane" - return "-b %d" % self.preset_used + return u"--preset insane" + return u"-b %d" % self.preset_used elif version in ((3, 90), (3, 91), (3, 92)): preset_key = (self.vbr_quality, self.quality, self.vbr_method, self.lowpass_filter, self.ath_type) if preset_key == (1, 2, 4, 19500, 3): - return "--preset r3mix" + return u"--preset r3mix" if preset_key == (2, 2, 3, 19000, 4): - return "--alt-preset standard" + return u"--alt-preset standard" if preset_key == (2, 2, 3, 19500, 2): - return "--alt-preset extreme" + return u"--alt-preset extreme" if self.vbr_method == 3: - return "-V %s" % self.vbr_quality + return u"-V %s" % self.vbr_quality elif self.vbr_method in (4, 5): - return "-V %s --vbr-new" % self.vbr_quality + return u"-V %s --vbr-new" % self.vbr_quality elif version in ((3, 93), (3, 94), (3, 95), (3, 96), (3, 97)): if self.preset_used == 1001: - return "--preset standard" + return u"--preset standard" elif self.preset_used == 1002: - return "--preset extreme" + return u"--preset extreme" elif self.preset_used == 1004: - return "--preset fast standard" + return u"--preset fast standard" elif self.preset_used == 1005: - return "--preset fast extreme" + return u"--preset fast extreme" elif self.preset_used == 1006: - return "--preset medium" + return u"--preset medium" elif self.preset_used == 1007: - return "--preset fast medium" + return u"--preset fast medium" if self.vbr_method == 3: - return "-V %s" % self.vbr_quality + return u"-V %s" % self.vbr_quality elif self.vbr_method in (4, 5): - return "-V %s --vbr-new" % self.vbr_quality + return u"-V %s --vbr-new" % self.vbr_quality elif version == (3, 98): if self.vbr_method == 3: - return "-V %s --vbr-old" % self.vbr_quality + return u"-V %s --vbr-old" % self.vbr_quality elif self.vbr_method in (4, 5): - return "-V %s" % self.vbr_quality + return u"-V %s" % self.vbr_quality elif version >= (3, 99): if self.vbr_method == 3: - return "-V %s --vbr-old" % self.vbr_quality + return u"-V %s --vbr-old" % self.vbr_quality elif self.vbr_method in (4, 5): p = self.vbr_quality adjust_key = (p, self.bitrate, self.lowpass_filter) @@ -261,9 +263,9 @@ class LAMEHeader(object): (5, 8, 0): 8, (6, 8, 0): 9, }.get(adjust_key, p) - return "-V %s" % p + return u"-V %s" % p - return "" + return u"" @classmethod def parse_version(cls, fileobj): @@ -303,36 +305,36 @@ class LAMEHeader(object): if (major, minor) < (3, 90) or ( (major, minor) == (3, 90) and data[-11:-10] == b"("): flag = data.strip(b"\x00").rstrip().decode("ascii") - return (major, minor), "%d.%d%s" % (major, minor, flag), False + return (major, minor), u"%d.%d%s" % (major, minor, flag), False if len(data) < 11: raise LAMEError("Invalid version: too long") flag = data[:-11].rstrip(b"\x00") - flag_string = "" - patch = "" + flag_string = u"" + patch = u"" if flag == b"a": - flag_string = " (alpha)" + flag_string = u" (alpha)" elif flag == b"b": - flag_string = " (beta)" + flag_string = u" (beta)" elif flag == b"r": - patch = ".1+" + patch = u".1+" elif flag == b" ": if (major, minor) > (3, 96): - patch = ".0" + patch = u".0" else: - patch = ".0+" + patch = u".0+" elif flag == b"" or flag == b".": - patch = ".0+" + patch = u".0+" else: - flag_string = " (?)" + flag_string = u" (?)" # extended header, seek back to 9 bytes for the caller fileobj.seek(-11, 1) return (major, minor), \ - "%d.%d%s%s" % (major, minor, patch, flag_string), True + u"%d.%d%s%s" % (major, minor, patch, flag_string), True class XingHeaderError(Exception): @@ -369,7 +371,7 @@ class XingHeader(object): lame_version = (0, 0) """The LAME version as two element tuple (major, minor)""" - lame_version_desc = "" + lame_version_desc = u"" """The version of the LAME encoder e.g. '3.99.0'. Empty if unknown""" is_info = False @@ -425,7 +427,7 @@ class XingHeader(object): """Returns the guessed encoder settings""" if self.lame_header is None: - return "" + return u"" return self.lame_header.guess_settings(*self.lame_version) @classmethod diff --git a/lib/mutagen/mp4/__init__.py b/lib/mutagen/mp4/__init__.py old mode 100755 new mode 100644 index a74a303a..8231d65e --- a/lib/mutagen/mp4/__init__.py +++ b/lib/mutagen/mp4/__init__.py @@ -25,13 +25,15 @@ were all consulted. import struct import sys +from io import BytesIO +from collections.abc import Sequence +from datetime import timedelta from mutagen import FileType, Tags, StreamInfo, PaddingInfo from mutagen._constants import GENRES from mutagen._util import cdata, insert_bytes, DictProxy, MutagenError, \ - hashable, enum, get_size, resize_bytes, loadfile, convert_error -from mutagen._compat import (reraise, PY2, string_types, text_type, chr_, - iteritems, PY3, cBytesIO, izip, xrange) + hashable, enum, get_size, resize_bytes, loadfile, convert_error, bchr, \ + reraise from ._atom import Atoms, Atom, AtomError from ._util import parse_full_atom from ._as_entry import AudioSampleEntry, ASEntryError @@ -205,14 +207,10 @@ class MP4FreeForm(bytes): def _name2key(name): - if PY2: - return name return name.decode("latin-1") def _key2name(key): - if PY2: - return key return key.encode("latin-1") @@ -290,6 +288,8 @@ class MP4Tags(DictProxy, Tags): * 'soco' -- composer sort order * 'sosn' -- show sort order * 'tvsh' -- show name + * '\\xa9wrk' -- work + * '\\xa9mvn' -- movement Boolean values: @@ -309,6 +309,7 @@ class MP4Tags(DictProxy, Tags): * '\\xa9mvi' -- Movement Index * 'shwm' -- work/movement * 'stik' -- Media Kind + * 'hdvd' -- HD Video * 'rtng' -- Content Rating * 'tves' -- TV Episode * 'tvsn' -- TV Season @@ -390,17 +391,17 @@ class MP4Tags(DictProxy, Tags): @convert_error(IOError, error) @loadfile(writable=True) - def save(self, filething, padding=None): + def save(self, filething=None, padding=None): values = [] - items = sorted(list(self.items()), key=lambda kv: _item_sort_key(*kv)) + items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv)) for key, value in items: try: values.append(self._render(key, value)) except (TypeError, ValueError) as s: reraise(MP4MetadataValueError, s, sys.exc_info()[2]) - for key, failed in iteritems(self._failed_atoms): + for key, failed in self._failed_atoms.items(): # don't write atoms back if we have added a new one with # the same name, this excludes freeform which can have # multiple atoms with the same key (most parsers seem to be able @@ -560,6 +561,9 @@ class MP4Tags(DictProxy, Tags): if len(head) != 12: raise MP4MetadataError("truncated atom % r" % atom.name) length, name = struct.unpack(">I4s", head[:8]) + if length < 1: + raise MP4MetadataError( + "atom %r has a length of zero" % atom.name) version = ord(head[8:9]) flags = struct.unpack(">I", b"\x00" + head[9:12])[0] if name != b"data": @@ -599,7 +603,9 @@ class MP4Tags(DictProxy, Tags): if atom_name != b"data": raise MP4MetadataError( "unexpected atom %r inside %r" % (atom_name, atom.name)) - + if length < 1: + raise MP4MetadataError( + "atom %r has a length of zero" % atom.name) version = ord(data[pos + 8:pos + 8 + 1]) flags = struct.unpack(">I", b"\x00" + data[pos + 9:pos + 12])[0] value.append(MP4FreeForm(data[pos + 16:pos + length], @@ -714,7 +720,8 @@ class MP4Tags(DictProxy, Tags): # by itunes for compatibility. if cdata.int8_min <= v <= cdata.int8_max and min_bytes <= 1: data = cdata.to_int8(v) - if cdata.int16_min <= v <= cdata.int16_max and min_bytes <= 2: + elif cdata.int16_min <= v <= cdata.int16_max and \ + min_bytes <= 2: data = cdata.to_int16_be(v) elif cdata.int32_min <= v <= cdata.int32_max and \ min_bytes <= 4: @@ -743,7 +750,7 @@ class MP4Tags(DictProxy, Tags): def __render_bool(self, key, value): return self.__render_data( - key, 0, AtomDataType.INTEGER, [chr_(bool(value))]) + key, 0, AtomDataType.INTEGER, [bchr(bool(value))]) def __parse_cover(self, atom, data): values = [] @@ -757,6 +764,9 @@ class MP4Tags(DictProxy, Tags): continue raise MP4MetadataError( "unexpected atom %r inside 'covr'" % name) + if length < 1: + raise MP4MetadataError( + "atom %r has a length of zero" % atom.name) if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG): # Sometimes AtomDataType.IMPLICIT or simply wrong. # In all cases it was jpeg, so default to it @@ -804,18 +814,14 @@ class MP4Tags(DictProxy, Tags): self.__add(key, values) def __render_text(self, key, value, flags=AtomDataType.UTF8): - if isinstance(value, string_types): + if isinstance(value, str): value = [value] encoded = [] for v in value: - if not isinstance(v, text_type): - if PY3: - raise TypeError("%r not str" % v) - try: - v = v.decode("utf-8") - except (AttributeError, UnicodeDecodeError) as e: - raise TypeError(e) + if not isinstance(v, str): + raise TypeError("%r not str" % v) + encoded.append(v.encode("utf-8")) return self.__render_data(key, 0, flags, encoded) @@ -849,6 +855,7 @@ class MP4Tags(DictProxy, Tags): b"pcst": (__parse_bool, __render_bool), b"shwm": (__parse_integer, __render_integer, 1), b"stik": (__parse_integer, __render_integer, 1), + b"hdvd": (__parse_integer, __render_integer, 1), b"rtng": (__parse_integer, __render_integer, 1), b"covr": (__parse_cover, __render_cover), b"purl": (__parse_text, __render_text), @@ -866,24 +873,141 @@ class MP4Tags(DictProxy, Tags): def pprint(self): def to_line(key, value): - assert isinstance(key, text_type) - if isinstance(value, text_type): - return "%s=%s" % (key, value) - return "%s=%r" % (key, value) + assert isinstance(key, str) + if isinstance(value, str): + return u"%s=%s" % (key, value) + return u"%s=%r" % (key, value) values = [] - for key, value in sorted(iteritems(self)): - if not isinstance(key, text_type): + for key, value in sorted(self.items()): + if not isinstance(key, str): key = key.decode("latin-1") if key == "covr": - values.append("%s=%s" % (key, ", ".join( - ["[%d bytes of data]" % len(data) for data in value]))) + values.append(u"%s=%s" % (key, u", ".join( + [u"[%d bytes of data]" % len(data) for data in value]))) elif isinstance(value, list): for v in value: values.append(to_line(key, v)) else: values.append(to_line(key, value)) - return "\n".join(values) + return u"\n".join(values) + + +class Chapter(object): + """Chapter() + + Chapter information container + """ + def __init__(self, start, title): + self.start = start + self.title = title + + +class MP4Chapters(Sequence): + """MP4Chapters() + + MPEG-4 Chapter information. + + Supports the 'moov.udta.chpl' box. + + A sequence of Chapter objects with the following members: + start (`float`): position from the start of the file in seconds + title (`str`): title of the chapter + + """ + + def __init__(self, *args, **kwargs): + self._timescale = None + self._duration = None + self._chapters = [] + super(MP4Chapters, self).__init__() + if args or kwargs: + self.load(*args, **kwargs) + + def __len__(self): + return self._chapters.__len__() + + def __getitem__(self, key): + return self._chapters.__getitem__(key) + + def load(self, atoms, fileobj): + try: + mvhd = atoms.path(b"moov", b"mvhd")[-1] + except KeyError as key: + return MP4MetadataError(key) + + self._parse_mvhd(mvhd, fileobj) + + if not self._timescale: + raise MP4MetadataError("Unable to get timescale") + + try: + chpl = atoms.path(b"moov", b"udta", b"chpl")[-1] + except KeyError as key: + return MP4MetadataError(key) + + self._parse_chpl(chpl, fileobj) + + @classmethod + def _can_load(cls, atoms): + return b"moov.udta.chpl" in atoms and b"moov.mvhd" in atoms + + def _parse_mvhd(self, atom, fileobj): + assert atom.name == b"mvhd" + + ok, data = atom.read(fileobj) + if not ok: + raise MP4StreamInfoError("Invalid mvhd") + + version = data[0] + + pos = 4 + if version == 0: + pos += 8 # created, modified + + self._timescale = struct.unpack(">l", data[pos:pos + 4])[0] + pos += 4 + + self._duration = struct.unpack(">l", data[pos:pos + 4])[0] + pos += 4 + elif version == 1: + pos += 16 # created, modified + + self._timescale = struct.unpack(">l", data[pos:pos + 4])[0] + pos += 4 + + self._duration = struct.unpack(">q", data[pos:pos + 8])[0] + pos += 8 + + def _parse_chpl(self, atom, fileobj): + assert atom.name == b"chpl" + + ok, data = atom.read(fileobj) + if not ok: + raise MP4StreamInfoError("Invalid atom") + + chapters = data[8] + + pos = 9 + for i in range(chapters): + start = struct.unpack(">Q", data[pos:pos + 8])[0] / 10000 + pos += 8 + + title_len = data[pos] + pos += 1 + + try: + title = data[pos:pos + title_len].decode() + except UnicodeDecodeError as e: + raise MP4MetadataError("chapter %d title: %s" % (i, e)) + pos += title_len + + self._chapters.append(Chapter(start / self._timescale, title)) + + def pprint(self): + chapters = ["%s %s" % (timedelta(seconds=chapter.start), chapter.title) + for chapter in self._chapters] + return "chapters=%s" % '\n '.join(chapters) class MP4Info(StreamInfo): @@ -915,8 +1039,8 @@ class MP4Info(StreamInfo): channels = 0 sample_rate = 0 bits_per_sample = 0 - codec = "" - codec_description = "" + codec = u"" + codec_description = u"" def __init__(self, *args, **kwargs): if args or kwargs: @@ -1001,7 +1125,7 @@ class MP4Info(StreamInfo): return # look at the first entry if there is one - entry_fileobj = cBytesIO(data[offset:]) + entry_fileobj = BytesIO(data[offset:]) try: entry_atom = Atom(entry_fileobj) except AtomError as e: @@ -1041,6 +1165,7 @@ class MP4(FileType): """ MP4Tags = MP4Tags + MP4Chapters = MP4Chapters _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] @@ -1065,7 +1190,6 @@ class MP4(FileType): if not MP4Tags._can_load(atoms): self.tags = None - self._padding = 0 else: try: self.tags = self.MP4Tags(atoms, fileobj) @@ -1073,14 +1197,51 @@ class MP4(FileType): raise except Exception as err: reraise(MP4MetadataError, err, sys.exc_info()[2]) - else: - self._padding = self.tags._padding + + if not MP4Chapters._can_load(atoms): + self.chapters = None + else: + try: + self.chapters = self.MP4Chapters(atoms, fileobj) + except error: + raise + except Exception as err: + reraise(MP4MetadataError, err, sys.exc_info()[2]) + + @property + def _padding(self): + if self.tags is None: + return 0 + else: + return self.tags._padding def save(self, *args, **kwargs): """save(filething=None, padding=None)""" super(MP4, self).save(*args, **kwargs) + def pprint(self): + """ + Returns: + text: stream information, comment key=value pairs and chapters. + """ + stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) + try: + tags = self.tags.pprint() + except AttributeError: + pass + else: + stream += ((tags and "\n" + tags) or "") + + try: + chapters = self.chapters.pprint() + except AttributeError: + pass + else: + stream += "\n" + chapters + + return stream + def add_tags(self): if self.tags is None: self.tags = self.MP4Tags() diff --git a/lib/mutagen/mp4/_as_entry.py b/lib/mutagen/mp4/_as_entry.py old mode 100755 new mode 100644 index e2408351..9ee60c44 --- a/lib/mutagen/mp4/_as_entry.py +++ b/lib/mutagen/mp4/_as_entry.py @@ -6,10 +6,10 @@ # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -from mutagen._compat import cBytesIO, xrange +from io import BytesIO + from mutagen.aac import ProgramConfigElement from mutagen._util import BitReader, BitReaderError, cdata -from mutagen._compat import text_type from ._util import parse_full_atom from ._atom import Atom, AtomError @@ -47,7 +47,7 @@ class AudioSampleEntry(object): if not ok: raise ASEntryError("too short %r atom" % atom.name) - fileobj = cBytesIO(data) + fileobj = BytesIO(data) r = BitReader(fileobj) try: @@ -93,7 +93,7 @@ class AudioSampleEntry(object): ok, data = atom.read(fileobj) if not ok: raise ASEntryError("truncated %s atom" % atom.name) - fileobj = cBytesIO(data) + fileobj = BytesIO(data) r = BitReader(fileobj) # sample_rate in AudioSampleEntry covers values in @@ -134,7 +134,7 @@ class AudioSampleEntry(object): if version != 0: raise ASEntryError("Unsupported version %d" % version) - fileobj = cBytesIO(data) + fileobj = BytesIO(data) r = BitReader(fileobj) try: @@ -168,7 +168,7 @@ class AudioSampleEntry(object): if version != 0: raise ASEntryError("Unsupported version %d" % version) - fileobj = cBytesIO(data) + fileobj = BytesIO(data) r = BitReader(fileobj) try: @@ -239,9 +239,13 @@ class BaseDescriptor(object): pos = fileobj.tell() instance = cls(fileobj, length) left = length - (fileobj.tell() - pos) - if left < 0: - raise DescriptorError("descriptor parsing read too much data") - fileobj.seek(left, 1) + if left > 0: + fileobj.seek(left, 1) + else: + # XXX: In case the instance length is shorted than the content + # assume the size is wrong and just continue parsing + # https://github.com/quodlibet/mutagen/issues/444 + pass return instance @@ -318,10 +322,10 @@ class DecoderConfigDescriptor(BaseDescriptor): def codec_param(self): """string""" - param = ".%X" % self.objectTypeIndication + param = u".%X" % self.objectTypeIndication info = self.decSpecificInfo if info is not None: - param += ".%d" % info.audioObjectType + param += u".%d" % info.audioObjectType return param @property @@ -371,7 +375,7 @@ class DecoderSpecificInfo(BaseDescriptor): name += "+SBR" if self.psPresentFlag == 1: name += "+PS" - return text_type(name) + return str(name) @property def sample_rate(self): diff --git a/lib/mutagen/mp4/_atom.py b/lib/mutagen/mp4/_atom.py old mode 100755 new mode 100644 index 4ff4f7b5..e755d39b --- a/lib/mutagen/mp4/_atom.py +++ b/lib/mutagen/mp4/_atom.py @@ -8,7 +8,6 @@ import struct -from mutagen._compat import PY2 from mutagen._util import convert_error # This is not an exhaustive list of container atoms, but just the @@ -180,12 +179,8 @@ class Atoms(object): specifying the complete path ('moov.udta'). """ - if PY2: - if isinstance(names, str): - names = names.split(b".") - else: - if isinstance(names, bytes): - names = names.split(b".") + if isinstance(names, bytes): + names = names.split(b".") for child in self.atoms: if child.name == names[0]: diff --git a/lib/mutagen/mp4/_util.py b/lib/mutagen/mp4/_util.py old mode 100755 new mode 100644 diff --git a/lib/mutagen/musepack.py b/lib/mutagen/musepack.py old mode 100755 new mode 100644 index 2c088b61..200c5f7a --- a/lib/mutagen/musepack.py +++ b/lib/mutagen/musepack.py @@ -19,11 +19,10 @@ __all__ = ["Musepack", "Open", "delete"] import struct -from ._compat import endswith, xrange from mutagen import StreamInfo from mutagen.apev2 import APEv2File, error, delete from mutagen.id3._util import BitPaddedInt -from mutagen._util import cdata, convert_error +from mutagen._util import cdata, convert_error, intround, endswith class MusepackHeaderError(error): @@ -118,7 +117,7 @@ class MusepackInfo(StreamInfo): if not self.bitrate and self.length != 0: fileobj.seek(0, 2) - self.bitrate = int(round(fileobj.tell() * 8 / self.length)) + self.bitrate = intround(fileobj.tell() * 8 / self.length) def __parse_sv8(self, fileobj): # SV8 http://trac.musepack.net/trac/wiki/SV8Specification @@ -143,9 +142,13 @@ class MusepackInfo(StreamInfo): # packets can be at maximum data_size big and are padded with zeros if frame_type == b"SH": + if frame_type not in mandatory_packets: + raise MusepackHeaderError("Duplicate SH packet") mandatory_packets.remove(frame_type) self.__parse_stream_header(fileobj, data_size) elif frame_type == b"RG": + if frame_type not in mandatory_packets: + raise MusepackHeaderError("Duplicate RG packet") mandatory_packets.remove(frame_type) self.__parse_replaygain_packet(fileobj, data_size) else: @@ -184,9 +187,13 @@ class MusepackInfo(StreamInfo): remaining_size -= l1 + l2 data = fileobj.read(remaining_size) - if len(data) != remaining_size: + if len(data) != remaining_size or len(data) < 2: raise MusepackHeaderError("SH packet ended unexpectedly.") - self.sample_rate = RATES[bytearray(data)[0] >> 5] + rate_index = (bytearray(data)[0] >> 5) + try: + self.sample_rate = RATES[rate_index] + except IndexError: + raise MusepackHeaderError("Invalid sample rate") self.channels = (bytearray(data)[1] >> 4) + 1 def __parse_replaygain_packet(self, fileobj, data_size): @@ -253,12 +260,12 @@ class MusepackInfo(StreamInfo): def pprint(self): rg_data = [] if hasattr(self, "title_gain"): - rg_data.append("%+0.2f (title)" % self.title_gain) + rg_data.append(u"%+0.2f (title)" % self.title_gain) if hasattr(self, "album_gain"): - rg_data.append("%+0.2f (album)" % self.album_gain) + rg_data.append(u"%+0.2f (album)" % self.album_gain) rg_data = (rg_data and ", Gain: " + ", ".join(rg_data)) or "" - return "Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( + return u"Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( self.version, self.length, self.sample_rate, self.bitrate, rg_data) diff --git a/lib/mutagen/ogg.py b/lib/mutagen/ogg.py old mode 100755 new mode 100644 index dd976bb4..86ac5c2e --- a/lib/mutagen/ogg.py +++ b/lib/mutagen/ogg.py @@ -19,10 +19,11 @@ http://www.xiph.org/ogg/doc/rfc3533.txt. import struct import sys import zlib +from io import BytesIO from mutagen import FileType -from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, seek_end -from ._compat import cBytesIO, reraise, chr_, izip, xrange +from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, \ + seek_end, bchr, reraise class error(MutagenError): @@ -37,7 +38,7 @@ class OggPage(object): A page is a header of 26 bytes, followed by the length of the data, followed by the data. - The constructor is givin a file-like object pointing to the start + The constructor is given a file-like object pointing to the start of an Ogg page. After the constructor is finished it is pointing to the start of the next page. @@ -50,7 +51,7 @@ class OggPage(object): offset (`int` or `None`): offset this page was read from (default None) complete (`bool`): if the last packet on this page is complete (default True) - packets (List[`bytes`]): list of raw packet data (default []) + packets (list[bytes]): list of raw packet data (default []) Note that if 'complete' is false, the next page's 'continued' property must be true (so set both when constructing pages). @@ -145,11 +146,11 @@ class OggPage(object): lacing_data = [] for datum in self.packets: quot, rem = divmod(len(datum), 255) - lacing_data.append(b"\xff" * quot + chr_(rem)) + lacing_data.append(b"\xff" * quot + bchr(rem)) lacing_data = b"".join(lacing_data) if not self.complete and lacing_data.endswith(b"\x00"): lacing_data = lacing_data[:-1] - data.append(chr_(len(lacing_data))) + data.append(bchr(len(lacing_data))) data.append(lacing_data) data.extend(self.packets) data = b"".join(data) @@ -216,7 +217,7 @@ class OggPage(object): so also the CRC. If an error occurs (e.g. non-Ogg data is found), fileobj will - be left pointing to the place in the stream the error occured, + be left pointing to the place in the stream the error occurred, but the invalid data will be left intact (since this function does not change the total file size). """ @@ -267,11 +268,12 @@ class OggPage(object): else: sequence += 1 - if page.continued: - packets[-1].append(page.packets[0]) - else: - packets.append([page.packets[0]]) - packets.extend([p] for p in page.packets[1:]) + if page.packets: + if page.continued: + packets[-1].append(page.packets[0]) + else: + packets.append([page.packets[0]]) + packets.extend([p] for p in page.packets[1:]) return [b"".join(p) for p in packets] @@ -388,7 +390,7 @@ class OggPage(object): # Number the new pages starting from the first old page. first = old_pages[0].sequence for page, seq in zip(new_pages, - range(first, first + len(new_pages))): + range(first, first + len(new_pages))): page.sequence = seq page.serial = old_pages[0].serial @@ -434,7 +436,7 @@ class OggPage(object): cls.renumber(fileobj, serial, sequence) @staticmethod - def find_last(fileobj, serial): + def find_last(fileobj, serial, finishing=False): """Find the last page of the stream 'serial'. If the file is not multiplexed this function is fast. If it is, @@ -443,6 +445,10 @@ class OggPage(object): This finds the last page in the actual file object, or the last page in the stream (with eos set), whichever comes first. + If finishing is True it returns the last page which contains a packet + finishing on it. If there exist pages but none with finishing packets + returns None. + Returns None in case no page with the serial exists. Raises error in case this isn't a valid ogg stream. Raises IOError. @@ -456,14 +462,18 @@ class OggPage(object): index = data.rindex(b"OggS") except ValueError: raise error("unable to find final Ogg header") - bytesobj = cBytesIO(data[index:]) + bytesobj = BytesIO(data[index:]) + + def is_valid(page): + return not finishing or page.position != -1 + best_page = None try: page = OggPage(bytesobj) except error: pass else: - if page.serial == serial: + if page.serial == serial and is_valid(page): if page.last: return page else: @@ -475,12 +485,14 @@ class OggPage(object): fileobj.seek(0) try: page = OggPage(fileobj) - while not page.last: + while True: + if page.serial == serial: + if is_valid(page): + best_page = page + if page.last: + break page = OggPage(fileobj) - while page.serial != serial: - page = OggPage(fileobj) - best_page = page - return page + return best_page except error: return best_page except EOFError: @@ -525,7 +537,7 @@ class OggFileType(FileType): raise self._Error("no appropriate stream found") @loadfile(writable=True) - def delete(self, filething): + def delete(self, filething=None): """delete(filething=None) Remove tags from a file. @@ -557,7 +569,7 @@ class OggFileType(FileType): raise self._Error @loadfile(writable=True) - def save(self, filething, padding=None): + def save(self, filething=None, padding=None): """save(filething=None, padding=None) Save a tag to a file. @@ -566,7 +578,7 @@ class OggFileType(FileType): Args: filething (filething) - padding (PaddingFunction) + padding (:obj:`mutagen.PaddingFunction`) Raises: mutagen.MutagenError """ diff --git a/lib/mutagen/oggflac.py b/lib/mutagen/oggflac.py old mode 100755 new mode 100644 index 029c4638..fe17a28f --- a/lib/mutagen/oggflac.py +++ b/lib/mutagen/oggflac.py @@ -18,8 +18,7 @@ http://flac.sourceforge.net/ogg_mapping.html. __all__ = ["OggFLAC", "Open", "delete"] import struct - -from ._compat import cBytesIO +from io import BytesIO from mutagen import StreamInfo from mutagen.flac import StreamInfo as FLACStreamInfo, error as FLACError @@ -65,7 +64,7 @@ class OggFLACStreamInfo(StreamInfo): self.serial = page.serial # Skip over the block header. - stringobj = cBytesIO(page.packets[0][17:]) + stringobj = BytesIO(page.packets[0][17:]) try: flac_info = FLACStreamInfo(stringobj) @@ -79,11 +78,13 @@ class OggFLACStreamInfo(StreamInfo): def _post_tags(self, fileobj): if self.length: return - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) + if page is None: + raise OggFLACHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): - return "Ogg FLAC, %.2f seconds, %d Hz" % ( + return u"Ogg FLAC, %.2f seconds, %d Hz" % ( self.length, self.sample_rate) @@ -99,7 +100,7 @@ class OggFLACVComment(VCommentDict): if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) - comment = cBytesIO(OggPage.to_packets(pages)[0][4:]) + comment = BytesIO(OggPage.to_packets(pages)[0][4:]) super(OggFLACVComment, self).__init__(comment, framing=False) def _inject(self, fileobj, padding_func): diff --git a/lib/mutagen/oggopus.py b/lib/mutagen/oggopus.py old mode 100755 new mode 100644 index baca7849..e6ca9a81 --- a/lib/mutagen/oggopus.py +++ b/lib/mutagen/oggopus.py @@ -17,9 +17,9 @@ Based on http://tools.ietf.org/html/draft-terriberry-oggopus-01 __all__ = ["OggOpus", "Open", "delete"] import struct +from io import BytesIO from mutagen import StreamInfo -from mutagen._compat import BytesIO from mutagen._util import get_size, loadfile, convert_error from mutagen._tags import PaddingInfo from mutagen._vorbis import VCommentDict @@ -69,13 +69,13 @@ class OggOpusInfo(StreamInfo): raise OggOpusHeaderError("version %r unsupported" % major) def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) if page is None: raise OggOpusHeaderError self.length = (page.position - self.__pre_skip) / float(48000) def pprint(self): - return "Ogg Opus, %.2f seconds" % (self.length) + return u"Ogg Opus, %.2f seconds" % (self.length) class OggOpusVComment(VCommentDict): diff --git a/lib/mutagen/oggspeex.py b/lib/mutagen/oggspeex.py old mode 100755 new mode 100644 index 0645ba0c..de02a449 --- a/lib/mutagen/oggspeex.py +++ b/lib/mutagen/oggspeex.py @@ -64,13 +64,13 @@ class OggSpeexInfo(StreamInfo): self.serial = page.serial def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) if page is None: raise OggSpeexHeaderError self.length = page.position / float(self.sample_rate) def pprint(self): - return "Ogg Speex, %.2f seconds" % self.length + return u"Ogg Speex, %.2f seconds" % self.length class OggSpeexVComment(VCommentDict): diff --git a/lib/mutagen/oggtheora.py b/lib/mutagen/oggtheora.py old mode 100755 new mode 100644 index cd1fa72f..796cec72 --- a/lib/mutagen/oggtheora.py +++ b/lib/mutagen/oggtheora.py @@ -50,33 +50,39 @@ class OggTheoraInfo(StreamInfo): def __init__(self, fileobj): page = OggPage(fileobj) - while not page.packets[0].startswith(b"\x80theora"): + while not page.packets or \ + not page.packets[0].startswith(b"\x80theora"): page = OggPage(fileobj) if not page.first: raise OggTheoraHeaderError( "page has ID header, but doesn't start a stream") data = page.packets[0] + if len(data) < 42: + raise OggTheoraHeaderError("Truncated header") vmaj, vmin = struct.unpack("2B", data[7:9]) if (vmaj, vmin) != (3, 2): raise OggTheoraHeaderError( "found Theora version %d.%d != 3.2" % (vmaj, vmin)) fps_num, fps_den = struct.unpack(">2I", data[22:30]) + if not fps_den or not fps_num: + raise OggTheoraHeaderError("FRN or FRD is equal to zero") self.fps = fps_num / float(fps_den) self.bitrate = cdata.uint_be(b"\x00" + data[37:40]) self.granule_shift = (cdata.ushort_be(data[40:42]) >> 5) & 0x1F self.serial = page.serial def _post_tags(self, fileobj): - page = OggPage.find_last(fileobj, self.serial) + page = OggPage.find_last(fileobj, self.serial, finishing=True) if page is None: raise OggTheoraHeaderError position = page.position mask = (1 << self.granule_shift) - 1 frames = (position >> self.granule_shift) + (position & mask) + assert self.fps self.length = frames / float(self.fps) def pprint(self): - return "Ogg Theora, %.2f seconds, %d bps" % (self.length, + return u"Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate) @@ -91,7 +97,10 @@ class OggTheoraCommentDict(VCommentDict): if page.serial == info.serial: pages.append(page) complete = page.complete or (len(page.packets) > 1) - data = OggPage.to_packets(pages)[0][7:] + packets = OggPage.to_packets(pages) + if not packets: + raise error("Missing metadata packet") + data = packets[0][7:] super(OggTheoraCommentDict, self).__init__(data, framing=False) self._padding = len(data) - self._size @@ -100,7 +109,8 @@ class OggTheoraCommentDict(VCommentDict): fileobj.seek(0) page = OggPage(fileobj) - while not page.packets[0].startswith(b"\x81theora"): + while not page.packets or \ + not page.packets[0].startswith(b"\x81theora"): page = OggPage(fileobj) old_pages = [page] diff --git a/lib/mutagen/oggvorbis.py b/lib/mutagen/oggvorbis.py old mode 100755 new mode 100644 index 4bc75265..d73f3192 --- a/lib/mutagen/oggvorbis.py +++ b/lib/mutagen/oggvorbis.py @@ -43,7 +43,7 @@ class OggVorbisInfo(StreamInfo): length (`float`): File length in seconds, as a float channels (`int`): Number of channels bitrate (`int`): Nominal ('average') bitrate in bits per second - sample_Rate (`int`): Sample rate in Hz + sample_rate (`int`): Sample rate in Hz """ @@ -56,13 +56,20 @@ class OggVorbisInfo(StreamInfo): """Raises ogg.error, IOError""" page = OggPage(fileobj) + if not page.packets: + raise OggVorbisHeaderError("page has not packets") while not page.packets[0].startswith(b"\x01vorbis"): page = OggPage(fileobj) if not page.first: raise OggVorbisHeaderError( "page has ID header, but doesn't start a stream") + if len(page.packets[0]) < 28: + raise OggVorbisHeaderError( + "page contains a packet too short to be valid") (self.channels, self.sample_rate, max_bitrate, nominal_bitrate, - min_bitrate) = struct.unpack("= 15: + encoder_id = struct.unpack("> 4) + 4500) + self.encoder_info = "%s.%s" % (version[0], version[1:]) + else: + self.encoder_info = "" def pprint(self): - return "OptimFROG, %.2f seconds, %d Hz" % (self.length, + return u"OptimFROG, %.2f seconds, %d Hz" % (self.length, self.sample_rate) diff --git a/lib/mutagen/smf.py b/lib/mutagen/smf.py old mode 100755 new mode 100644 index abaa4ae8..2921ef52 --- a/lib/mutagen/smf.py +++ b/lib/mutagen/smf.py @@ -12,8 +12,7 @@ import struct from mutagen import StreamInfo, MutagenError from mutagen._file import FileType -from mutagen._util import loadfile -from mutagen._compat import xrange, endswith +from mutagen._util import loadfile, endswith class SMFError(MutagenError): @@ -36,7 +35,7 @@ def _var_int(data, offset=0): def _read_track(chunk): """Retuns a list of midi events and tempo change events""" - TEMPO, MIDI = list(range(2)) + TEMPO, MIDI = range(2) # Deviations: The running status should be reset on non midi events, but # some files contain meta events inbetween. @@ -91,7 +90,7 @@ def _read_track(chunk): def _read_midi_length(fileobj): """Returns the duration in seconds. Can raise all kind of errors...""" - TEMPO, MIDI = list(range(2)) + TEMPO, MIDI = range(2) def read_chunk(fileobj): info = fileobj.read(8) @@ -178,7 +177,7 @@ class SMFInfo(StreamInfo): self.length = _read_midi_length(fileobj) def pprint(self): - return "SMF, %.2f seconds" % self.length + return u"SMF, %.2f seconds" % self.length class SMF(FileType): diff --git a/lib/mutagen/tak.py b/lib/mutagen/tak.py new file mode 100644 index 00000000..424bd881 --- /dev/null +++ b/lib/mutagen/tak.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2008 Lukáš Lalinský +# Copyright (C) 2019 Philipp Wolfer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Tom's lossless Audio Kompressor (TAK) streams with APEv2 tags. + +TAK is a lossless audio compressor developed by Thomas Becker. + +For more information, see: + +* http://www.thbeck.de/Tak/Tak.html +* http://wiki.hydrogenaudio.org/index.php?title=TAK +""" + +__all__ = ["TAK", "Open", "delete"] + +import struct + +from mutagen import StreamInfo +from mutagen.apev2 import ( + APEv2File, + delete, + error, +) +from mutagen._util import ( + BitReader, + BitReaderError, + convert_error, + enum, + endswith, +) + + +@enum +class TAKMetadata(object): + END = 0 + STREAM_INFO = 1 + SEEK_TABLE = 2 # Removed in TAK 1.1.1 + SIMPLE_WAVE_DATA = 3 + ENCODER_INFO = 4 + UNUSED_SPACE = 5 # New in TAK 1.0.3 + MD5 = 6 # New in TAK 1.1.1 + LAST_FRAME_INFO = 7 # New in TAK 1.1.1 + + +CRC_SIZE = 3 + +ENCODER_INFO_CODEC_BITS = 6 +ENCODER_INFO_PROFILE_BITS = 4 +ENCODER_INFO_TOTAL_BITS = ENCODER_INFO_CODEC_BITS + ENCODER_INFO_PROFILE_BITS + +SIZE_INFO_FRAME_DURATION_BITS = 4 +SIZE_INFO_SAMPLE_NUM_BITS = 35 +SIZE_INFO_TOTAL_BITS = (SIZE_INFO_FRAME_DURATION_BITS + + SIZE_INFO_SAMPLE_NUM_BITS) + +AUDIO_FORMAT_DATA_TYPE_BITS = 3 +AUDIO_FORMAT_SAMPLE_RATE_BITS = 18 +AUDIO_FORMAT_SAMPLE_BITS_BITS = 5 +AUDIO_FORMAT_CHANNEL_NUM_BITS = 4 +AUDIO_FORMAT_HAS_EXTENSION_BITS = 1 +AUDIO_FORMAT_BITS_MIN = 31 +AUDIO_FORMAT_BITS_MAX = 31 + 102 + +SAMPLE_RATE_MIN = 6000 +SAMPLE_BITS_MIN = 8 +CHANNEL_NUM_MIN = 1 + +STREAM_INFO_BITS_MIN = (ENCODER_INFO_TOTAL_BITS + + SIZE_INFO_TOTAL_BITS + + AUDIO_FORMAT_BITS_MIN) +STREAM_INFO_BITS_MAX = (ENCODER_INFO_TOTAL_BITS + + SIZE_INFO_TOTAL_BITS + + AUDIO_FORMAT_BITS_MAX) +STREAM_INFO_SIZE_MIN = (STREAM_INFO_BITS_MIN + 7) / 8 +STREAM_INFO_SIZE_MAX = (STREAM_INFO_BITS_MAX + 7) / 8 + + +class _LSBBitReader(BitReader): + """BitReader implementation which reads bits starting at LSB in each byte. + """ + + def _lsb(self, count): + value = self._buffer & 0xff >> (8 - count) + self._buffer = self._buffer >> count + self._bits -= count + return value + + def bits(self, count): + """Reads `count` bits and returns an uint, LSB read first. + + May raise BitReaderError if not enough data could be read or + IOError by the underlying file object. + """ + if count < 0: + raise ValueError + + value = 0 + if count <= self._bits: + value = self._lsb(count) + else: + # First read all available bits + shift = 0 + remaining = count + if self._bits > 0: + remaining -= self._bits + shift = self._bits + value = self._lsb(self._bits) + assert self._bits == 0 + + # Now add additional bytes + n_bytes = (remaining - self._bits + 7) // 8 + data = self._fileobj.read(n_bytes) + if len(data) != n_bytes: + raise BitReaderError("not enough data") + for b in bytearray(data): + if remaining > 8: # Use full byte + remaining -= 8 + value = (b << shift) | value + shift += 8 + else: + self._buffer = b + self._bits = 8 + b = self._lsb(remaining) + value = (b << shift) | value + + assert 0 <= self._bits < 8 + return value + + +class TAKHeaderError(error): + pass + + +class TAKInfo(StreamInfo): + + """TAK stream information. + + Attributes: + channels (`int`): number of audio channels + length (`float`): file length in seconds, as a float + sample_rate (`int`): audio sampling rate in Hz + bits_per_sample (`int`): audio sample size + encoder_info (`mutagen.text`): encoder version + """ + + channels = 0 + length = 0 + sample_rate = 0 + bitrate = 0 + encoder_info = "" + + @convert_error(IOError, TAKHeaderError) + @convert_error(BitReaderError, TAKHeaderError) + def __init__(self, fileobj): + stream_id = fileobj.read(4) + if len(stream_id) != 4 or not stream_id == b"tBaK": + raise TAKHeaderError("not a TAK file") + + bitreader = _LSBBitReader(fileobj) + while True: + type = TAKMetadata(bitreader.bits(7)) + bitreader.skip(1) # Unused + size = struct.unpack(" 0: + self.length = self.number_of_samples / float(self.sample_rate) + + def _parse_stream_info(self, bitreader, size): + if size < STREAM_INFO_SIZE_MIN or size > STREAM_INFO_SIZE_MAX: + raise TAKHeaderError("stream info has invalid length") + + # Encoder Info + bitreader.skip(ENCODER_INFO_CODEC_BITS) + bitreader.skip(ENCODER_INFO_PROFILE_BITS) + + # Size Info + bitreader.skip(SIZE_INFO_FRAME_DURATION_BITS) + self.number_of_samples = bitreader.bits(SIZE_INFO_SAMPLE_NUM_BITS) + + # Audio Format + bitreader.skip(AUDIO_FORMAT_DATA_TYPE_BITS) + self.sample_rate = (bitreader.bits(AUDIO_FORMAT_SAMPLE_RATE_BITS) + + SAMPLE_RATE_MIN) + self.bits_per_sample = (bitreader.bits(AUDIO_FORMAT_SAMPLE_BITS_BITS) + + SAMPLE_BITS_MIN) + self.channels = (bitreader.bits(AUDIO_FORMAT_CHANNEL_NUM_BITS) + + CHANNEL_NUM_MIN) + bitreader.skip(AUDIO_FORMAT_HAS_EXTENSION_BITS) + + def _parse_encoder_info(self, bitreader, size): + patch = bitreader.bits(8) + minor = bitreader.bits(8) + major = bitreader.bits(8) + self.encoder_info = "TAK %d.%d.%d" % (major, minor, patch) + + def pprint(self): + return u"%s, %d Hz, %d bits, %.2f seconds, %d channel(s)" % ( + self.encoder_info or "TAK", self.sample_rate, self.bits_per_sample, + self.length, self.channels) + + +class TAK(APEv2File): + """TAK(filething) + + Arguments: + filething (filething) + + Attributes: + info (`TAKInfo`) + """ + + _Info = TAKInfo + _mimes = ["audio/x-tak"] + + @staticmethod + def score(filename, fileobj, header): + return header.startswith(b"tBaK") + endswith(filename.lower(), ".tak") + + +Open = TAK diff --git a/lib/mutagen/trueaudio.py b/lib/mutagen/trueaudio.py old mode 100755 new mode 100644 index fd5ca050..3251ce47 --- a/lib/mutagen/trueaudio.py +++ b/lib/mutagen/trueaudio.py @@ -10,17 +10,16 @@ True Audio is a lossless format designed for real-time encoding and decoding. This module is based on the documentation at -http://www.true-audio.com/TTA_Lossless_Audio_Codec\_-_Format_Description +http://www.true-audio.com/TTA_Lossless_Audio_Codec\\_-_Format_Description True Audio files use ID3 tags. """ __all__ = ["TrueAudio", "Open", "delete", "EasyTrueAudio"] -from ._compat import endswith from mutagen import StreamInfo from mutagen.id3 import ID3FileType, delete -from mutagen._util import cdata, MutagenError, convert_error +from mutagen._util import cdata, MutagenError, convert_error, endswith class error(MutagenError): @@ -54,7 +53,7 @@ class TrueAudioInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return "True Audio, %.2f seconds, %d Hz." % ( + return u"True Audio, %.2f seconds, %d Hz." % ( self.length, self.sample_rate) diff --git a/lib/mutagen/wave.py b/lib/mutagen/wave.py new file mode 100644 index 00000000..5acf01cc --- /dev/null +++ b/lib/mutagen/wave.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2017 Borewit +# Copyright (C) 2019-2020 Philipp Wolfer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +"""Microsoft WAVE/RIFF audio file/stream information and tags.""" + +import sys +import struct + +from mutagen import StreamInfo, FileType + +from mutagen.id3 import ID3 +from mutagen._riff import RiffFile, InvalidChunk +from mutagen._iff import error as IffError +from mutagen.id3._util import ID3NoHeaderError, error as ID3Error +from mutagen._util import ( + convert_error, + endswith, + loadfile, + reraise, +) + +__all__ = ["WAVE", "Open", "delete"] + + +class error(IffError): + """WAVE stream parsing errors.""" + + +class _WaveFile(RiffFile): + """Representation of a RIFF/WAVE file""" + + def __init__(self, fileobj): + RiffFile.__init__(self, fileobj) + + if self.file_type != u'WAVE': + raise error("Expected RIFF/WAVE.") + + # Normalize ID3v2-tag-chunk to lowercase + if u'ID3' in self: + self[u'ID3'].id = u'id3' + + +class WaveStreamInfo(StreamInfo): + """WaveStreamInfo() + + Microsoft WAVE file information. + + Information is parsed from the 'fmt' & 'data'chunk of the RIFF/WAVE file + + Attributes: + length (`float`): audio length, in seconds + bitrate (`int`): audio bitrate, in bits per second + channels (`int`): The number of audio channels + sample_rate (`int`): audio sample rate, in Hz + bits_per_sample (`int`): The audio sample size + """ + + length = 0.0 + bitrate = 0 + channels = 0 + sample_rate = 0 + bits_per_sample = 0 + + SIZE = 16 + + @convert_error(IOError, error) + def __init__(self, fileobj): + """Raises error""" + + wave_file = _WaveFile(fileobj) + try: + format_chunk = wave_file[u'fmt'] + except KeyError as e: + raise error(str(e)) + + data = format_chunk.read() + if len(data) < 16: + raise InvalidChunk() + + # RIFF: http://soundfile.sapp.org/doc/WaveFormat/ + # Python struct.unpack: + # https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment + info = struct.unpack(' 0: + try: + data_chunk = wave_file[u'data'] + self._number_of_samples = data_chunk.data_size / block_align + except KeyError: + pass + + if self.sample_rate > 0: + self.length = self._number_of_samples / self.sample_rate + + def pprint(self): + return u"%d channel RIFF @ %d bps, %s Hz, %.2f seconds" % ( + self.channels, self.bitrate, self.sample_rate, self.length) + + +class _WaveID3(ID3): + """A Wave file with ID3v2 tags""" + + def _pre_load_header(self, fileobj): + try: + fileobj.seek(_WaveFile(fileobj)[u'id3'].data_offset) + except (InvalidChunk, KeyError): + raise ID3NoHeaderError("No ID3 chunk") + + @convert_error(IOError, error) + @loadfile(writable=True) + def save(self, filething, v1=1, v2_version=4, v23_sep='/', padding=None): + """Save ID3v2 data to the Wave/RIFF file""" + + fileobj = filething.fileobj + wave_file = _WaveFile(fileobj) + + if u'id3' not in wave_file: + wave_file.insert_chunk(u'id3') + + chunk = wave_file[u'id3'] + + try: + data = self._prepare_data( + fileobj, chunk.data_offset, chunk.data_size, v2_version, + v23_sep, padding) + except ID3Error as e: + reraise(error, e, sys.exc_info()[2]) + + chunk.resize(len(data)) + chunk.write(data) + + def delete(self, filething): + """Completely removes the ID3 chunk from the RIFF/WAVE file""" + + delete(filething) + self.clear() + + +@convert_error(IOError, error) +@loadfile(method=False, writable=True) +def delete(filething): + """Completely removes the ID3 chunk from the RIFF/WAVE file""" + + try: + _WaveFile(filething.fileobj).delete_chunk(u'id3') + except KeyError: + pass + + +class WAVE(FileType): + """WAVE(filething) + + A Waveform Audio File Format + (WAVE, or more commonly known as WAV due to its filename extension) + + Arguments: + filething (filething) + + Attributes: + tags (`mutagen.id3.ID3`) + info (`WaveStreamInfo`) + """ + + _mimes = ["audio/wav", "audio/wave"] + + @staticmethod + def score(filename, fileobj, header): + filename = filename.lower() + + return (header.startswith(b"RIFF") + (header[8:12] == b'WAVE') + + endswith(filename, b".wav") + endswith(filename, b".wave")) + + def add_tags(self): + """Add an empty ID3 tag to the file.""" + if self.tags is None: + self.tags = _WaveID3() + else: + raise error("an ID3 tag already exists") + + @convert_error(IOError, error) + @loadfile() + def load(self, filething, **kwargs): + """Load stream and tag information from a file.""" + + fileobj = filething.fileobj + self.info = WaveStreamInfo(fileobj) + fileobj.seek(0, 0) + + try: + self.tags = _WaveID3(fileobj, **kwargs) + except ID3NoHeaderError: + self.tags = None + except ID3Error as e: + raise error(e) + else: + self.tags.filename = self.filename + + +Open = WAVE diff --git a/lib/mutagen/wavpack.py b/lib/mutagen/wavpack.py old mode 100755 new mode 100644 index ca471b1e..906d6bef --- a/lib/mutagen/wavpack.py +++ b/lib/mutagen/wavpack.py @@ -76,9 +76,10 @@ class WavPackInfo(StreamInfo): Attributes: channels (int): number of audio channels (1 or 2) - length (float: file length in seconds, as a float + length (float): file length in seconds, as a float sample_rate (int): audio sampling rate in Hz - version (int) WavPack stream version + bits_per_sample (int): audio sample size + version (int): WavPack stream version """ def __init__(self, fileobj): @@ -90,6 +91,12 @@ class WavPackInfo(StreamInfo): self.version = header.version self.channels = bool(header.flags & 4) or 2 self.sample_rate = RATES[(header.flags >> 23) & 0xF] + self.bits_per_sample = ((header.flags & 3) + 1) * 8 + + # most common multiplier (DSD64) + if (header.flags >> 31) & 1: + self.sample_rate *= 4 + self.bits_per_sample = 1 if header.total_samples == -1 or header.block_index != 0: # TODO: we could make this faster by using the tag size @@ -109,11 +116,20 @@ class WavPackInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return "WavPack, %.2f seconds, %d Hz" % (self.length, + return u"WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate) class WavPack(APEv2File): + """WavPack(filething) + + Arguments: + filething (filething) + + Attributes: + info (`WavPackInfo`) + """ + _Info = WavPackInfo _mimes = ["audio/x-wavpack"]