# FLAC comment support for Mutagen # Copyright 2005 Joe Wreschnig # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. """Read and write FLAC Vorbis comments and stream information. Read more about FLAC at http://flac.sourceforge.net. FLAC supports arbitrary metadata blocks. The two most interesting ones are the FLAC stream information block, and the Vorbis comment block; these are also the only ones Mutagen can currently read. This module does not handle Ogg FLAC files. Based off documentation available at http://flac.sourceforge.net/format.html """ __all__ = ["FLAC", "Open", "delete"] import struct from cStringIO import StringIO from _vorbis import VCommentDict from lib.mutagen import FileType from lib.mutagen._util import insert_bytes from lib.mutagen.id3 import BitPaddedInt class error(IOError): pass class FLACNoHeaderError(error): pass class FLACVorbisError(ValueError, error): pass def to_int_be(string): """Convert an arbitrarily-long string to a long using big-endian byte order.""" return reduce(lambda a, b: (a << 8) + ord(b), string, 0L) class MetadataBlock(object): """A generic block of FLAC metadata. This class is extended by specific used as an ancestor for more specific blocks, and also as a container for data blobs of unknown blocks. Attributes: data -- raw binary data for this block """ def __init__(self, data): """Parse the given data string or file-like as a metadata block. The metadata header should not be included.""" if data is not None: if isinstance(data, str): data = StringIO(data) elif not hasattr(data, 'read'): raise TypeError( "StreamInfo requires string data or a file-like") self.load(data) def load(self, data): self.data = data.read() def write(self): return self.data def writeblocks(blocks): """Render metadata block as a byte string.""" data = [] codes = [[block.code, block.write()] for block in blocks] codes[-1][0] |= 128 for code, datum in codes: byte = chr(code) if len(datum) > 2**24: raise error("block is too long to write") length = struct.pack(">I", len(datum))[-3:] data.append(byte + length + datum) return "".join(data) writeblocks = staticmethod(writeblocks) def group_padding(blocks): """Consolidate FLAC padding metadata blocks. The overall size of the rendered blocks does not change, so this adds several bytes of padding for each merged block.""" paddings = filter(lambda x: isinstance(x, Padding), blocks) map(blocks.remove, paddings) padding = Padding() # total padding size is the sum of padding sizes plus 4 bytes # per removed header. size = sum([padding.length for padding in paddings]) padding.length = size + 4 * (len(paddings) - 1) blocks.append(padding) group_padding = staticmethod(group_padding) class StreamInfo(MetadataBlock): """FLAC stream information. This contains information about the audio data in the FLAC file. Unlike most stream information objects in Mutagen, changes to this one will rewritten to the file when it is saved. Unless you are actually changing the audio stream itself, don't change any attributes of this block. Attributes: min_blocksize -- minimum audio block size max_blocksize -- maximum audio block size sample_rate -- audio sample rate in Hz channels -- audio channels (1 for mono, 2 for stereo) bits_per_sample -- bits per sample total_samples -- total samples in file length -- audio length in seconds """ code = 0 def __eq__(self, other): try: return (self.min_blocksize == other.min_blocksize and self.max_blocksize == other.max_blocksize and self.sample_rate == other.sample_rate and self.channels == other.channels and self.bits_per_sample == other.bits_per_sample and self.total_samples == other.total_samples) except: return False def load(self, data): self.min_blocksize = int(to_int_be(data.read(2))) self.max_blocksize = int(to_int_be(data.read(2))) self.min_framesize = int(to_int_be(data.read(3))) self.max_framesize = int(to_int_be(data.read(3))) # first 16 bits of sample rate sample_first = to_int_be(data.read(2)) # last 4 bits of sample rate, 3 of channels, first 1 of bits/sample sample_channels_bps = to_int_be(data.read(1)) # last 4 of bits/sample, 36 of total samples bps_total = to_int_be(data.read(5)) sample_tail = sample_channels_bps >> 4 self.sample_rate = int((sample_first << 4) + sample_tail) self.channels = int(((sample_channels_bps >> 1) & 7) + 1) bps_tail = bps_total >> 36 bps_head = (sample_channels_bps & 1) << 4 self.bits_per_sample = int(bps_head + bps_tail + 1) self.total_samples = bps_total & 0xFFFFFFFFFL self.length = self.total_samples / float(self.sample_rate) self.md5_signature = to_int_be(data.read(16)) def write(self): f = StringIO() f.write(struct.pack(">I", self.min_blocksize)[-2:]) f.write(struct.pack(">I", self.max_blocksize)[-2:]) f.write(struct.pack(">I", self.min_framesize)[-3:]) f.write(struct.pack(">I", self.max_framesize)[-3:]) # first 16 bits of sample rate f.write(struct.pack(">I", self.sample_rate >> 4)[-2:]) # 4 bits sample, 3 channel, 1 bps byte = (self.sample_rate & 0xF) << 4 byte += ((self.channels - 1) & 7) << 1 byte += ((self.bits_per_sample - 1) >> 4) & 1 f.write(chr(byte)) # 4 bits of bps, 4 of sample count byte = ((self.bits_per_sample - 1) & 0xF) << 4 byte += (self.total_samples >> 32) & 0xF f.write(chr(byte)) # last 32 of sample count f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL)) # MD5 signature sig = self.md5_signature f.write(struct.pack( ">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL, (sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL)) return f.getvalue() def pprint(self): return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) class SeekPoint(tuple): """A single seek point in a FLAC file. Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL, and byte_offset and num_samples undefined. Seek points must be sorted in ascending order by first_sample number. Seek points must be unique by first_sample number, except for placeholder points. Placeholder points must occur last in the table and there may be any number of them. Attributes: first_sample -- sample number of first sample in the target frame byte_offset -- offset from first frame to target frame num_samples -- number of samples in target frame """ def __new__(cls, first_sample, byte_offset, num_samples): return super(cls, SeekPoint).__new__(cls, (first_sample, byte_offset, num_samples)) first_sample = property(lambda self: self[0]) byte_offset = property(lambda self: self[1]) num_samples = property(lambda self: self[2]) class SeekTable(MetadataBlock): """Read and write FLAC seek tables. Attributes: seekpoints -- list of SeekPoint objects """ __SEEKPOINT_FORMAT = '>QQH' __SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT) code = 3 def __init__(self, data): self.seekpoints = [] super(SeekTable, self).__init__(data) def __eq__(self, other): try: return (self.seekpoints == other.seekpoints) except (AttributeError, TypeError): return False def load(self, data): self.seekpoints = [] sp = data.read(self.__SEEKPOINT_SIZE) while len(sp) == self.__SEEKPOINT_SIZE: self.seekpoints.append(SeekPoint( *struct.unpack(self.__SEEKPOINT_FORMAT, sp))) sp = data.read(self.__SEEKPOINT_SIZE) def write(self): f = StringIO() for seekpoint in self.seekpoints: packed = struct.pack(self.__SEEKPOINT_FORMAT, seekpoint.first_sample, seekpoint.byte_offset, seekpoint.num_samples) f.write(packed) return f.getvalue() def __repr__(self): return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints) class VCFLACDict(VCommentDict): """Read and write FLAC Vorbis comments. FLACs don't use the framing bit at the end of the comment block. So this extends VCommentDict to not use the framing bit. """ code = 4 def load(self, data, errors='replace', framing=False): super(VCFLACDict, self).load(data, errors=errors, framing=framing) def write(self, framing=False): return super(VCFLACDict, self).write(framing=framing) class CueSheetTrackIndex(tuple): """Index for a track in a cuesheet. For CD-DA, an index_number of 0 corresponds to the track pre-gap. The first index in a track must have a number of 0 or 1, and subsequently, index_numbers must increase by 1. Index_numbers must be unique within a track. And index_offset must be evenly divisible by 588 samples. Attributes: index_number -- index point number index_offset -- offset in samples from track start """ def __new__(cls, index_number, index_offset): return super(cls, CueSheetTrackIndex).__new__(cls, (index_number, index_offset)) index_number = property(lambda self: self[0]) index_offset = property(lambda self: self[1]) class CueSheetTrack(object): """A track in a cuesheet. For CD-DA, track_numbers must be 1-99, or 170 for the lead-out. Track_numbers must be unique within a cue sheet. There must be atleast one index in every track except the lead-out track which must have none. Attributes: track_number -- track number start_offset -- track offset in samples from start of FLAC stream isrc -- ISRC code type -- 0 for audio, 1 for digital data pre_emphasis -- true if the track is recorded with pre-emphasis indexes -- list of CueSheetTrackIndex objects """ def __init__(self, track_number, start_offset, isrc='', type_=0, pre_emphasis=False): self.track_number = track_number self.start_offset = start_offset self.isrc = isrc self.type = type_ self.pre_emphasis = pre_emphasis self.indexes = [] def __eq__(self, other): try: return (self.track_number == other.track_number and self.start_offset == other.start_offset and self.isrc == other.isrc and self.type == other.type and self.pre_emphasis == other.pre_emphasis and self.indexes == other.indexes) except (AttributeError, TypeError): return False def __repr__(self): return ("<%s number=%r, offset=%d, isrc=%r, type=%r, " "pre_emphasis=%r, indexes=%r)>") % ( type(self).__name__, self.track_number, self.start_offset, self.isrc, self.type, self.pre_emphasis, self.indexes) class CueSheet(MetadataBlock): """Read and write FLAC embedded cue sheets. Number of tracks should be from 1 to 100. There should always be exactly one lead-out track and that track must be the last track in the cue sheet. Attributes: media_catalog_number -- media catalog number in ASCII lead_in_samples -- number of lead-in samples compact_disc -- true if the cuesheet corresponds to a compact disc tracks -- list of CueSheetTrack objects lead_out -- lead-out as CueSheetTrack or None if lead-out was not found """ __CUESHEET_FORMAT = '>128sQB258xB' __CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT) __CUESHEET_TRACK_FORMAT = '>QB12sB13xB' __CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT) __CUESHEET_TRACKINDEX_FORMAT = '>QB3x' __CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT) code = 5 media_catalog_number = '' lead_in_samples = 88200 compact_disc = True def __init__(self, data): self.tracks = [] super(CueSheet, self).__init__(data) def __eq__(self, other): try: return (self.media_catalog_number == other.media_catalog_number and self.lead_in_samples == other.lead_in_samples and self.compact_disc == other.compact_disc and self.tracks == other.tracks) except (AttributeError, TypeError): return False def load(self, data): header = data.read(self.__CUESHEET_SIZE) media_catalog_number, lead_in_samples, flags, num_tracks = \ struct.unpack(self.__CUESHEET_FORMAT, header) self.media_catalog_number = media_catalog_number.rstrip('\0') self.lead_in_samples = lead_in_samples self.compact_disc = bool(flags & 0x80) self.tracks = [] for i in range(num_tracks): track = data.read(self.__CUESHEET_TRACK_SIZE) start_offset, track_number, isrc_padded, flags, num_indexes = \ struct.unpack(self.__CUESHEET_TRACK_FORMAT, track) isrc = isrc_padded.rstrip('\0') type_ = (flags & 0x80) >> 7 pre_emphasis = bool(flags & 0x40) val = CueSheetTrack( track_number, start_offset, isrc, type_, pre_emphasis) for j in range(num_indexes): index = data.read(self.__CUESHEET_TRACKINDEX_SIZE) index_offset, index_number = struct.unpack( self.__CUESHEET_TRACKINDEX_FORMAT, index) val.indexes.append( CueSheetTrackIndex(index_number, index_offset)) self.tracks.append(val) def write(self): f = StringIO() flags = 0 if self.compact_disc: flags |= 0x80 packed = struct.pack( self.__CUESHEET_FORMAT, self.media_catalog_number, self.lead_in_samples, flags, len(self.tracks)) f.write(packed) for track in self.tracks: track_flags = 0 track_flags |= (track.type & 1) << 7 if track.pre_emphasis: track_flags |= 0x40 track_packed = struct.pack( self.__CUESHEET_TRACK_FORMAT, track.start_offset, track.track_number, track.isrc, track_flags, len(track.indexes)) f.write(track_packed) for index in track.indexes: index_packed = struct.pack( self.__CUESHEET_TRACKINDEX_FORMAT, index.index_offset, index.index_number) f.write(index_packed) return f.getvalue() def __repr__(self): return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, " "tracks=%r>") % ( type(self).__name__, self.media_catalog_number, self.lead_in_samples, self.compact_disc, self.tracks) class Picture(MetadataBlock): """Read and write FLAC embed pictures. Attributes: type -- picture type (same as types for ID3 APIC frames) mime -- MIME type of the picture desc -- picture's description width -- width in pixels height -- height in pixels depth -- color depth in bits-per-pixel colors -- number of colors for indexed palettes (like GIF), 0 for non-indexed data -- picture data """ code = 6 def __init__(self, data=None): self.type = 0 self.mime = u'' self.desc = u'' self.width = 0 self.height = 0 self.depth = 0 self.colors = 0 self.data = '' super(Picture, self).__init__(data) def __eq__(self, other): try: return (self.type == other.type and self.mime == other.mime and self.desc == other.desc and self.width == other.width and self.height == other.height and self.depth == other.depth and self.colors == other.colors and self.data == other.data) except (AttributeError, TypeError): return False def load(self, data): self.type, length = struct.unpack('>2I', data.read(8)) self.mime = data.read(length).decode('UTF-8', 'replace') length, = struct.unpack('>I', data.read(4)) self.desc = data.read(length).decode('UTF-8', 'replace') (self.width, self.height, self.depth, self.colors, length) = struct.unpack('>5I', data.read(20)) self.data = data.read(length) def write(self): f = StringIO() mime = self.mime.encode('UTF-8') f.write(struct.pack('>2I', self.type, len(mime))) f.write(mime) desc = self.desc.encode('UTF-8') f.write(struct.pack('>I', len(desc))) f.write(desc) f.write(struct.pack('>5I', self.width, self.height, self.depth, self.colors, len(self.data))) f.write(self.data) return f.getvalue() def __repr__(self): return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime, len(self.data)) class Padding(MetadataBlock): """Empty padding space for metadata blocks. To avoid rewriting the entire FLAC file when editing comments, metadata is often padded. Padding should occur at the end, and no more than one padding block should be in any FLAC file. Mutagen handles this with MetadataBlock.group_padding. """ code = 1 def __init__(self, data=""): super(Padding, self).__init__(data) def load(self, data): self.length = len(data.read()) def write(self): try: return "\x00" * self.length # On some 64 bit platforms this won't generate a MemoryError # or OverflowError since you might have enough RAM, but it # still generates a ValueError. On other 64 bit platforms, # this will still succeed for extremely large values. # Those should never happen in the real world, and if they # do, writeblocks will catch it. except (OverflowError, ValueError, MemoryError): raise error("cannot write %d bytes" % self.length) def __eq__(self, other): return isinstance(other, Padding) and self.length == other.length def __repr__(self): return "<%s (%d bytes)>" % (type(self).__name__, self.length) class FLAC(FileType): """A FLAC audio file. Attributes: info -- stream information (length, bitrate, sample rate) tags -- metadata tags, if any cuesheet -- CueSheet object, if any seektable -- SeekTable object, if any pictures -- list of embedded pictures """ _mimes = ["audio/x-flac", "application/x-flac"] METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict, CueSheet, Picture] """Known metadata block types, indexed by ID.""" def score(filename, fileobj, header): return (header.startswith("fLaC") + filename.lower().endswith(".flac") * 3) score = staticmethod(score) def __read_metadata_block(self, file): byte = ord(file.read(1)) size = to_int_be(file.read(3)) try: data = file.read(size) if len(data) != size: raise error( "file said %d bytes, read %d bytes" % (size, len(data))) block = self.METADATA_BLOCKS[byte & 0x7F](data) except (IndexError, TypeError): block = MetadataBlock(data) block.code = byte & 0x7F self.metadata_blocks.append(block) else: self.metadata_blocks.append(block) if block.code == VCFLACDict.code: if self.tags is None: self.tags = block else: raise FLACVorbisError("> 1 Vorbis comment block found") elif block.code == CueSheet.code: if self.cuesheet is None: self.cuesheet = block else: raise error("> 1 CueSheet block found") elif block.code == SeekTable.code: if self.seektable is None: self.seektable = block else: raise error("> 1 SeekTable block found") return (byte >> 7) ^ 1 def add_tags(self): """Add a Vorbis comment block to the file.""" if self.tags is None: self.tags = VCFLACDict() self.metadata_blocks.append(self.tags) else: raise FLACVorbisError("a Vorbis comment already exists") add_vorbiscomment = add_tags def delete(self, filename=None): """Remove Vorbis comments from a file. If no filename is given, the one most recently loaded is used. """ if filename is None: filename = self.filename for s in list(self.metadata_blocks): if isinstance(s, VCFLACDict): self.metadata_blocks.remove(s) self.tags = None self.save() break vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.") def load(self, filename): """Load file information from a filename.""" self.metadata_blocks = [] self.tags = None self.cuesheet = None self.seektable = None self.filename = filename fileobj = file(filename, "rb") try: self.__check_header(fileobj) while self.__read_metadata_block(fileobj): pass if fileobj.read(2) not in ["\xff\xf8", "\xff\xf9"]: raise FLACNoHeaderError("End of metadata did not start audio") finally: fileobj.close() try: self.metadata_blocks[0].length except (AttributeError, IndexError): raise FLACNoHeaderError("Stream info block not found") info = property(lambda s: s.metadata_blocks[0]) def add_picture(self, picture): """Add a new picture to the file.""" self.metadata_blocks.append(picture) def clear_pictures(self): """Delete all pictures from the file.""" self.metadata_blocks = filter(lambda b: b.code != Picture.code, self.metadata_blocks) def __get_pictures(self): return filter(lambda b: b.code == Picture.code, self.metadata_blocks) pictures = property(__get_pictures, doc="List of embedded pictures") def save(self, filename=None, deleteid3=False): """Save metadata blocks to a file. If no filename is given, the one most recently loaded is used. """ if filename is None: filename = self.filename f = open(filename, 'rb+') # Ensure we've got padding at the end, and only at the end. # If adding makes it too large, we'll scale it down later. self.metadata_blocks.append(Padding('\x00' * 1020)) MetadataBlock.group_padding(self.metadata_blocks) header = self.__check_header(f) available = self.__find_audio_offset(f) - header # "fLaC" and maybe ID3 data = MetadataBlock.writeblocks(self.metadata_blocks) # Delete ID3v2 if deleteid3 and header > 4: available += header - 4 header = 4 if len(data) > available: # If we have too much data, see if we can reduce padding. padding = self.metadata_blocks[-1] newlength = padding.length - (len(data) - available) if newlength > 0: padding.length = newlength data = MetadataBlock.writeblocks(self.metadata_blocks) assert len(data) == available elif len(data) < available: # If we have too little data, increase padding. self.metadata_blocks[-1].length += (available - len(data)) data = MetadataBlock.writeblocks(self.metadata_blocks) assert len(data) == available if len(data) != available: # We couldn't reduce the padding enough. diff = (len(data) - available) insert_bytes(f, diff, header) f.seek(header - 4) f.write("fLaC" + data) # Delete ID3v1 if deleteid3: try: f.seek(-128, 2) except IOError: pass else: if f.read(3) == "TAG": f.seek(-128, 2) f.truncate() def __find_audio_offset(self, fileobj): byte = 0x00 while not (byte >> 7) & 1: byte = ord(fileobj.read(1)) size = to_int_be(fileobj.read(3)) fileobj.read(size) return fileobj.tell() def __check_header(self, fileobj): size = 4 header = fileobj.read(4) if header != "fLaC": size = None if header[:3] == "ID3": size = 14 + BitPaddedInt(fileobj.read(6)[2:]) fileobj.seek(size - 4) if fileobj.read(4) != "fLaC": size = None if size is None: raise FLACNoHeaderError( "%r is not a valid FLAC file" % fileobj.name) return size Open = FLAC def delete(filename): """Remove tags from a file.""" FLAC(filename).delete()