Files
headphones/beets/autotag/mb.py

391 lines
14 KiB
Python

# This file is part of beets.
# Copyright 2013, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Searches for albums in the MusicBrainz database.
"""
import logging
import musicbrainzngs
import re
import traceback
from urlparse import urljoin
import beets.autotag.hooks
import beets
from beets import util
from beets import config
SEARCH_LIMIT = 5
VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
BASE_URL = 'http://musicbrainz.org/'
musicbrainzngs.set_useragent('beets', beets.__version__,
'http://beets.radbox.org/')
class MusicBrainzAPIError(util.HumanReadableException):
"""An error while talking to MusicBrainz. The `query` field is the
parameter to the action and may have any type.
"""
def __init__(self, reason, verb, query, tb=None):
self.query = query
super(MusicBrainzAPIError, self).__init__(reason, verb, tb)
def get_message(self):
return u'"{0}" in {1} with query {2}'.format(
self._reasonstr(), self.verb, repr(self.query)
)
log = logging.getLogger('beets')
RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups',
'labels', 'artist-credits', 'aliases']
TRACK_INCLUDES = ['artists', 'aliases']
def track_url(trackid):
return urljoin(BASE_URL, 'recording/' + trackid)
def album_url(albumid):
return urljoin(BASE_URL, 'release/' + albumid)
def configure():
"""Set up the python-musicbrainz-ngs module according to settings
from the beets configuration. This should be called at startup.
"""
musicbrainzngs.set_hostname(config['musicbrainz']['host'].get(unicode))
musicbrainzngs.set_rate_limit(
config['musicbrainz']['ratelimit_interval'].as_number(),
config['musicbrainz']['ratelimit'].get(int),
)
def _preferred_alias(aliases):
"""Given an list of alias structures for an artist credit, select
and return the user's preferred alias alias or None if no matching
alias is found.
"""
if not aliases:
return
# Only consider aliases that have locales set.
aliases = [a for a in aliases if 'locale' in a]
# Search configured locales in order.
for locale in config['import']['languages'].as_str_seq():
# Find matching primary aliases for this locale.
matches = [a for a in aliases if a['locale'] == locale and 'primary' in a]
# Skip to the next locale if we have no matches
if not matches:
continue
return matches[0]
def _flatten_artist_credit(credit):
"""Given a list representing an ``artist-credit`` block, flatten the
data into a triple of joined artist name strings: canonical, sort, and
credit.
"""
artist_parts = []
artist_sort_parts = []
artist_credit_parts = []
for el in credit:
if isinstance(el, basestring):
# Join phrase.
artist_parts.append(el)
artist_credit_parts.append(el)
artist_sort_parts.append(el)
else:
alias = _preferred_alias(el['artist'].get('alias-list', ()))
# An artist.
if alias:
cur_artist_name = alias['alias']
else:
cur_artist_name = el['artist']['name']
artist_parts.append(cur_artist_name)
# Artist sort name.
if alias:
artist_sort_parts.append(alias['sort-name'])
elif 'sort-name' in el['artist']:
artist_sort_parts.append(el['artist']['sort-name'])
else:
artist_sort_parts.append(cur_artist_name)
# Artist credit.
if 'name' in el:
artist_credit_parts.append(el['name'])
else:
artist_credit_parts.append(cur_artist_name)
return (
''.join(artist_parts),
''.join(artist_sort_parts),
''.join(artist_credit_parts),
)
def track_info(recording, index=None, medium=None, medium_index=None,
medium_total=None):
"""Translates a MusicBrainz recording result dictionary into a beets
``TrackInfo`` object. Three parameters are optional and are used
only for tracks that appear on releases (non-singletons): ``index``,
the overall track number; ``medium``, the disc number;
``medium_index``, the track's index on its medium; ``medium_total``,
the number of tracks on the medium. Each number is a 1-based index.
"""
info = beets.autotag.hooks.TrackInfo(
recording['title'],
recording['id'],
index=index,
medium=medium,
medium_index=medium_index,
medium_total=medium_total,
data_url=track_url(recording['id']),
)
if recording.get('artist-credit'):
# Get the artist names.
info.artist, info.artist_sort, info.artist_credit = \
_flatten_artist_credit(recording['artist-credit'])
# Get the ID and sort name of the first artist.
artist = recording['artist-credit'][0]['artist']
info.artist_id = artist['id']
if recording.get('length'):
info.length = int(recording['length']) / (1000.0)
info.decode()
return info
def _set_date_str(info, date_str, original=False):
"""Given a (possibly partial) YYYY-MM-DD string and an AlbumInfo
object, set the object's release date fields appropriately. If
`original`, then set the original_year, etc., fields.
"""
if date_str:
date_parts = date_str.split('-')
for key in ('year', 'month', 'day'):
if date_parts:
date_part = date_parts.pop(0)
try:
date_num = int(date_part)
except ValueError:
continue
if original:
key = 'original_' + key
setattr(info, key, date_num)
def album_info(release):
"""Takes a MusicBrainz release result dictionary and returns a beets
AlbumInfo object containing the interesting data about that release.
"""
# Get artist name using join phrases.
artist_name, artist_sort_name, artist_credit_name = \
_flatten_artist_credit(release['artist-credit'])
# Basic info.
track_infos = []
index = 0
for medium in release['medium-list']:
disctitle = medium.get('title')
for track in medium['track-list']:
# Basic information from the recording.
index += 1
ti = track_info(
track['recording'],
index,
int(medium['position']),
int(track['position']),
len(medium['track-list']),
)
ti.disctitle = disctitle
# Prefer track data, where present, over recording data.
if track.get('title'):
ti.title = track['title']
if track.get('artist-credit'):
# Get the artist names.
ti.artist, ti.artist_sort, ti.artist_credit = \
_flatten_artist_credit(track['artist-credit'])
ti.artist_id = track['artist-credit'][0]['artist']['id']
if track.get('length'):
ti.length = int(track['length']) / (1000.0)
track_infos.append(ti)
info = beets.autotag.hooks.AlbumInfo(
release['title'],
release['id'],
artist_name,
release['artist-credit'][0]['artist']['id'],
track_infos,
mediums=len(release['medium-list']),
artist_sort=artist_sort_name,
artist_credit=artist_credit_name,
data_source='MusicBrainz',
data_url=album_url(release['id']),
)
info.va = info.artist_id == VARIOUS_ARTISTS_ID
info.asin = release.get('asin')
info.releasegroup_id = release['release-group']['id']
info.country = release.get('country')
info.albumstatus = release.get('status')
# Build up the disambiguation string from the release group and release.
disambig = []
if release['release-group'].get('disambiguation'):
disambig.append(release['release-group'].get('disambiguation'))
if release.get('disambiguation'):
disambig.append(release.get('disambiguation'))
info.albumdisambig = u', '.join(disambig)
# Release type not always populated.
if 'type' in release['release-group']:
reltype = release['release-group']['type']
if reltype:
info.albumtype = reltype.lower()
# Release dates.
release_date = release.get('date')
release_group_date = release['release-group'].get('first-release-date')
if not release_date:
# Fall back if release-specific date is not available.
release_date = release_group_date
_set_date_str(info, release_date, False)
_set_date_str(info, release_group_date, True)
# Label name.
if release.get('label-info-list'):
label_info = release['label-info-list'][0]
if label_info.get('label'):
label = label_info['label']['name']
if label != '[no label]':
info.label = label
info.catalognum = label_info.get('catalog-number')
# Text representation data.
if release.get('text-representation'):
rep = release['text-representation']
info.script = rep.get('script')
info.language = rep.get('language')
# Media (format).
if release['medium-list']:
first_medium = release['medium-list'][0]
info.media = first_medium.get('format')
info.decode()
return info
def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT):
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over AlbumInfo objects. May raise a
MusicBrainzAPIError.
The query consists of an artist name, an album name, and,
optionally, a number of tracks on the album.
"""
# Build search criteria.
criteria = {'release': album.lower()}
if artist is not None:
criteria['artist'] = artist.lower()
else:
# Various Artists search.
criteria['arid'] = VARIOUS_ARTISTS_ID
if tracks is not None:
criteria['tracks'] = str(tracks)
# Abort if we have no search terms.
if not any(criteria.itervalues()):
return
try:
res = musicbrainzngs.search_releases(limit=limit, **criteria)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(exc, 'release search', criteria,
traceback.format_exc())
for release in res['release-list']:
# The search result is missing some data (namely, the tracks),
# so we just use the ID and fetch the rest of the information.
albuminfo = album_for_id(release['id'])
if albuminfo is not None:
yield albuminfo
def match_track(artist, title, limit=SEARCH_LIMIT):
"""Searches for a single track and returns an iterable of TrackInfo
objects. May raise a MusicBrainzAPIError.
"""
criteria = {
'artist': artist.lower(),
'recording': title.lower(),
}
if not any(criteria.itervalues()):
return
try:
res = musicbrainzngs.search_recordings(limit=limit, **criteria)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(exc, 'recording search', criteria,
traceback.format_exc())
for recording in res['recording-list']:
yield track_info(recording)
def _parse_id(s):
"""Search for a MusicBrainz ID in the given string and return it. If
no ID can be found, return None.
"""
# Find the first thing that looks like a UUID/MBID.
match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s)
if match:
return match.group()
def album_for_id(albumid):
"""Fetches an album by its MusicBrainz ID and returns an AlbumInfo
object or None if the album is not found. May raise a
MusicBrainzAPIError.
"""
albumid = _parse_id(albumid)
if not albumid:
log.error('Invalid MBID.')
return
try:
res = musicbrainzngs.get_release_by_id(albumid,
RELEASE_INCLUDES)
except musicbrainzngs.ResponseError:
log.debug('Album ID match failed.')
return None
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(exc, 'get release by ID', albumid,
traceback.format_exc())
return album_info(res['release'])
def track_for_id(trackid):
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
or None if no track is found. May raise a MusicBrainzAPIError.
"""
trackid = _parse_id(trackid)
if not trackid:
log.error('Invalid MBID.')
return
try:
res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
except musicbrainzngs.ResponseError:
log.debug('Track ID match failed.')
return None
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(exc, 'get recording by ID', trackid,
traceback.format_exc())
return track_info(res['recording'])