# MP3 stream header information support for Mutagen. # Copyright 2006 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. """MPEG audio stream information and tags.""" import os import struct from lib.mutagen.id3 import ID3FileType, BitPaddedInt, delete __all__ = ["MP3", "Open", "delete", "MP3"] class error(RuntimeError): pass class HeaderNotFoundError(error, IOError): pass class InvalidMPEGHeader(error, IOError): pass # Mode values. STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4) class MPEGInfo(object): """MPEG audio stream information Parse information about an MPEG audio file. This also reads the Xing VBR header format. This code was implemented based on the format documentation at http://www.dv.co.yu/mpgscript/mpeghdr.htm. Useful attributes: length -- audio length, in seconds bitrate -- audio bitrate, in bits per second sketchy -- if true, the file may not be valid MPEG audio Useless attributes: version -- MPEG version (1, 2, 2.5) layer -- 1, 2, or 3 mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) protected -- whether or not the file is "protected" padding -- whether or not audio frames are padded sample_rate -- audio sample rate, in Hz """ # Map (version, layer) tuples to bitrates. __BITRATE = { (1, 1): range(0, 480, 32), (1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384], (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320], (2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256], (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160], } __BITRATE[(2, 3)] = __BITRATE[(2, 2)] for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)] # Map version to sample rates. __RATES = { 1: [44100, 48000, 32000], 2: [22050, 24000, 16000], 2.5: [11025, 12000, 8000] } sketchy = False def __init__(self, fileobj, offset=None): """Parse MPEG stream information from a file-like object. If an offset argument is given, it is used to start looking for stream information and Xing headers; otherwise, ID3v2 tags will be skipped automatically. A correct offset can make loading files significantly faster. """ try: size = os.path.getsize(fileobj.name) except (IOError, OSError, AttributeError): fileobj.seek(0, 2) size = fileobj.tell() # If we don't get an offset, try to skip an ID3v2 tag. if offset is None: fileobj.seek(0, 0) idata = fileobj.read(10) try: id3, insize = struct.unpack('>3sxxx4s', idata) except struct.error: id3, insize = '', 0 insize = BitPaddedInt(insize) if id3 == 'ID3' and insize > 0: offset = insize else: offset = 0 # Try to find two valid headers (meaning, very likely MPEG data) # at the given offset, 30% through the file, 60% through the file, # and 90% through the file. for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]: try: self.__try(fileobj, int(i), size - offset) except error, e: pass else: break # If we can't find any two consecutive frames, try to find just # one frame back at the original offset given. else: self.__try(fileobj, offset, size - offset, False) self.sketchy = True def __try(self, fileobj, offset, real_size, check_second=True): # This is going to be one really long function; bear with it, # because there's not really a sane point to cut it up. fileobj.seek(offset, 0) # We "know" we have an MPEG file if we find two frames that look like # valid MPEG data. If we can't find them in 32k of reads, something # is horribly wrong (the longest frame can only be about 4k). This # is assuming the offset didn't lie. data = fileobj.read(32768) frame_1 = data.find("\xff") while 0 <= frame_1 <= len(data) - 4: frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0] if (frame_data >> 16) & 0xE0 != 0xE0: frame_1 = data.find("\xff", frame_1 + 2) else: version = (frame_data >> 19) & 0x3 layer = (frame_data >> 17) & 0x3 protection = (frame_data >> 16) & 0x1 bitrate = (frame_data >> 12) & 0xF sample_rate = (frame_data >> 10) & 0x3 padding = (frame_data >> 9) & 0x1 private = (frame_data >> 8) & 0x1 self.mode = (frame_data >> 6) & 0x3 mode_extension = (frame_data >> 4) & 0x3 copyright = (frame_data >> 3) & 0x1 original = (frame_data >> 2) & 0x1 emphasis = (frame_data >> 0) & 0x3 if (version == 1 or layer == 0 or sample_rate == 0x3 or bitrate == 0 or bitrate == 0xF): frame_1 = data.find("\xff", frame_1 + 2) else: break else: raise HeaderNotFoundError("can't sync to an MPEG frame") # There is a serious problem here, which is that many flags # in an MPEG header are backwards. self.version = [2.5, None, 2, 1][version] self.layer = 4 - layer self.protected = not protection self.padding = bool(padding) self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] self.bitrate *= 1000 self.sample_rate = self.__RATES[self.version][sample_rate] if self.layer == 1: frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4 frame_size = 384 elif self.version >= 2 and self.layer == 3: frame_length = 72 * self.bitrate / self.sample_rate + padding frame_size = 576 else: frame_length = 144 * self.bitrate / self.sample_rate + padding frame_size = 1152 if check_second: possible = frame_1 + frame_length if possible > len(data) + 4: raise HeaderNotFoundError("can't sync to second MPEG frame") frame_data = struct.unpack(">H", data[possible:possible + 2])[0] if frame_data & 0xFFE0 != 0xFFE0: raise HeaderNotFoundError("can't sync to second MPEG frame") frame_count = real_size / float(frame_length) samples = frame_size * frame_count self.length = samples / self.sample_rate # Try to find/parse the Xing header, which trumps the above length # and bitrate calculation. fileobj.seek(offset, 0) data = fileobj.read(32768) try: xing = data[:-4].index("Xing") except ValueError: # Try to find/parse the VBRI header, which trumps the above length # calculation. try: vbri = data[:-24].index("VBRI") except ValueError: pass else: # If a VBRI header was found, this is definitely MPEG audio. self.sketchy = False vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0] if vbri_version == 1: frame_count = struct.unpack( '>I', data[vbri + 14:vbri + 18])[0] samples = float(frame_size * frame_count) self.length = (samples / self.sample_rate) or self.length else: # If a Xing header was found, this is definitely MPEG audio. self.sketchy = False flags = struct.unpack('>I', data[xing + 4:xing + 8])[0] if flags & 0x1: frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0] samples = float(frame_size * frame_count) self.length = (samples / self.sample_rate) or self.length if flags & 0x2: bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0] self.bitrate = int((bytes * 8) // self.length) # If the bitrate * the length is nowhere near the file # length, recalculate using the bitrate and file length. # Don't do this for very small files. fileobj.seek(2, 0) size = fileobj.tell() expected = (self.bitrate / 8) * self.length if not (size / 2 < expected < size * 2) and size > 2**16: self.length = size / float(self.bitrate * 8) def pprint(self): s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % ( self.version, self.layer, self.bitrate, self.sample_rate, self.length) if self.sketchy: s += " (sketchy)" return s class MP3(ID3FileType): """An MPEG audio (usually MPEG-1 Layer 3) file.""" _Info = MPEGInfo _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg", "audio/x-mpeg"] def score(filename, fileobj, header): filename = filename.lower() return (header.startswith("ID3") * 2 + filename.endswith(".mp3") + filename.endswith(".mp2") + filename.endswith(".mpg") + filename.endswith(".mpeg")) score = staticmethod(score) Open = MP3 class EasyMP3(MP3): """Like MP3, but uses EasyID3 for tags.""" from lib.mutagen.easyid3 import EasyID3 as ID3