From be330afc54468511b46ce1120ad00bb61fcb6c1f Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sat, 8 Sep 2012 13:51:39 +0200 Subject: [PATCH 1/6] Added new valid include to browse_recordings --- lib/musicbrainzngs/musicbrainz.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/musicbrainzngs/musicbrainz.py b/lib/musicbrainzngs/musicbrainz.py index b0e94fed..e16c0816 100644 --- a/lib/musicbrainzngs/musicbrainz.py +++ b/lib/musicbrainzngs/musicbrainz.py @@ -552,6 +552,7 @@ def _do_mb_query(entity, id, includes=[], params={}): # Build the endpoint components. path = '%s/%s' % (entity, id) + logging.debug(str(path)) return _mb_request(path, 'GET', auth_required, args=args) def _do_mb_search(entity, query='', fields={}, @@ -776,7 +777,7 @@ def browse_recordings(artist=None, release=None, includes=[], limit=None, offset def browse_releases(artist=None, label=None, recording=None, release_group=None, release_status=[], release_type=[], includes=[], limit=None, offset=None): # track_artist param doesn't work yet - valid_includes = ["artist-credits", "labels", "recordings", "release-groups"] + valid_includes = ["artist-credits", "labels", "recordings", "release-groups","media"] params = {"artist": artist, "label": label, "recording": recording, From 0445859fb44280901277ddacbc42417c6a066ee4 Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sat, 8 Sep 2012 13:52:34 +0200 Subject: [PATCH 2/6] Initial Prototype for get_all_releases --- headphones/mb.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/headphones/mb.py b/headphones/mb.py index 36eeeae8..b7307b49 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -367,6 +367,63 @@ def getRelease(releaseid, include_artist_info=True): release['tracks'] = tracks return release +def get_all_releases(releasegroup,include_artist_info=True): + results = None + q, sleepytime = startmb() + try: + if include_artist_info: + results = musicbrainzngs.browse_releases(release_group=releasegroup,includes=['artist-credits','labels','recordings','release-groups','media']) + else: + results = musicbrainzngs.browse_releases(release_group=releasegroup,includes=['labels','recordings','release-groups','media']) + except WebServiceError, e: + logger.warn('Attempt to retrieve information from MusicBrainz for releasegroup "%s" failed (%s)' % (releasegroup, str(e))) + time.sleep(5) + + if not results or 'release-list' not in results: + return False + + results = results['release-list'] + releases = [] + for releasedata in results: + release = {} + release['AlbumASIN'] = unicode(releasedata['asin']) + release['AlbumID'] = unicode(releasedata['release-group']['id']) + release['Type'] = unicode(releasedata['release-group']['type']) + release['AlbumTitle'] = unicode(releasedata['title']) + #making the assumption that the most important artist will be first in the list + if include_artist_info: + release['ArtistID'] = unicode(releasedata['artist-credit'][0]['artist']['id']) + release['ArtistName'] = unicode(releasedata['artist-credit-phrase']) + release['ReleaseCountry'] = unicode(releasedata['country']) + release['ReleaseDate'] = unicode(releasedata['date']) + #assuming that the list will contain media at all and that the format will be consistent + try: + release['ReleaseFormat'] = unicode(releasedata['medium-list'][0]['format']) + except: + release['ReleaseFormat'] = u'Unknown' + release['ReleaseID'] = releasedata['id'] + + #pasted in from getRelease + totalTracks = 1 + tracks = [] + for medium in releasedata['medium-list']: + for track in medium['track-list']: + tracks.append({ + 'number': totalTracks, + 'title': unicode(track['recording']['title']), + 'id': unicode(track['recording']['id']), + 'url': u"http://musicbrainz.org/track/" + track['recording']['id'], + 'duration': int(track['length']) if 'length' in track else 0 + }) + totalTracks += 1 + release['Tracks'] = tracks + releases.append(release) + + + + + + return releases # Used when there is a disambiguation def findArtistbyAlbum(name): From b71bb6938df5e836067970a4999486610dc6e6a9 Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sat, 8 Sep 2012 16:13:33 +0200 Subject: [PATCH 3/6] get_all_releases now produces identical output to the final contents of the fullreleaselist in addArtisttoDB --- headphones/mb.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/headphones/mb.py b/headphones/mb.py index b7307b49..febd423e 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -367,41 +367,50 @@ def getRelease(releaseid, include_artist_info=True): release['tracks'] = tracks return release -def get_all_releases(releasegroup,include_artist_info=True): +def get_all_releases(rgid): results = None q, sleepytime = startmb() try: - if include_artist_info: - results = musicbrainzngs.browse_releases(release_group=releasegroup,includes=['artist-credits','labels','recordings','release-groups','media']) - else: - results = musicbrainzngs.browse_releases(release_group=releasegroup,includes=['labels','recordings','release-groups','media']) + results = musicbrainzngs.browse_releases(release_group=rgid,includes=['artist-credits','labels','recordings','release-groups','media']) except WebServiceError, e: - logger.warn('Attempt to retrieve information from MusicBrainz for releasegroup "%s" failed (%s)' % (releasegroup, str(e))) - time.sleep(5) + logger.warn('Attempt to retrieve information from MusicBrainz for release group "%s" failed (%s)' % (rgid, str(e))) + time.sleep(5) + return False if not results or 'release-list' not in results: return False results = results['release-list'] + releases = [] for releasedata in results: release = {} - release['AlbumASIN'] = unicode(releasedata['asin']) - release['AlbumID'] = unicode(releasedata['release-group']['id']) - release['Type'] = unicode(releasedata['release-group']['type']) release['AlbumTitle'] = unicode(releasedata['title']) + release['AlbumID'] = unicode(rgid) + release['AlbumASIN'] = unicode(releasedata['asin']) + release['ReleaseDate'] = unicode(releasedata['date']) + release['ReleaseID'] = releasedata['id'] + if 'release-group' not in releasedata: + raise Exception('No release group associated with release id ' + releasedata['id'] + ' album id' + rgid) + release['Type'] = unicode(releasedata['release-group']['type']) + + #making the assumption that the most important artist will be first in the list - if include_artist_info: + if 'artist-credit' in releasedata: release['ArtistID'] = unicode(releasedata['artist-credit'][0]['artist']['id']) release['ArtistName'] = unicode(releasedata['artist-credit-phrase']) - release['ReleaseCountry'] = unicode(releasedata['country']) - release['ReleaseDate'] = unicode(releasedata['date']) - #assuming that the list will contain media at all and that the format will be consistent + else: + raise Exception('Release ' + releasedata['id'] + ' has no Artists associated.') + + + release['ReleaseCountry'] = unicode(releasedata['country']) if 'country' in releasedata else u'Unknown' + #assuming that the list will contain media and that the format will be consistent try: release['ReleaseFormat'] = unicode(releasedata['medium-list'][0]['format']) except: release['ReleaseFormat'] = u'Unknown' - release['ReleaseID'] = releasedata['id'] + + #pasted in from getRelease totalTracks = 1 From fe8220a000d679869e4f1d80a6dfb996d4f97512 Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sat, 8 Sep 2012 18:54:11 +0200 Subject: [PATCH 4/6] Improved addArtisttoDB, it now uses get_all_releases(releasegroupid) to get the data. This has reduced the number of calls to the musicbrainz database by 90% in a test where i added U2, i expect similar effects for all artists. --- headphones/importer.py | 70 ++++++++++++++++-------------------------- headphones/mb.py | 7 +++-- 2 files changed, 31 insertions(+), 46 deletions(-) diff --git a/headphones/importer.py b/headphones/importer.py index d8ceb281..b25eb6f5 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -167,69 +167,51 @@ def addArtisttoDB(artistid, extrasonly=False): # check if the album already exists rg_exists = myDB.action("SELECT * from albums WHERE AlbumID=?", [rg['id']]).fetchone() - try: - releaselist = mb.getReleaseGroup(rgid) - except Exception, e: - logger.info('Unable to get release information for %s - there may not be any official releases in this release group' % rg['title']) - continue - - if not releaselist: + releases = mb.get_all_releases(rgid) + if not releases: errors = True + logger.info('Unable to get release information for %s - there may not be any official releases in this release group' % rg['title']) continue # This will be used later to build a hybrid release fullreleaselist = [] - - for release in releaselist: + + for release in releases: # What we're doing here now is first updating the allalbums & alltracks table to the most # current info, then moving the appropriate release into the album table and its associated # tracks into the tracks table - - releaseid = release['id'] - - try: - releasedict = mb.getRelease(releaseid, include_artist_info=False) - except Exception, e: - errors = True - logger.info('Unable to get release information for %s: %s' % (release['id'], e)) - continue - - if not releasedict: - errors = True - continue + controlValueDict = {"ReleaseID" : release['ReleaseID']} - controlValueDict = {"ReleaseID": release['id']} - - newValueDict = {"ArtistID": artistid, - "ArtistName": artist['artist_name'], - "AlbumTitle": rg['title'], - "AlbumID": rg['id'], - "AlbumASIN": releasedict['asin'], - "ReleaseDate": releasedict['date'], - "Type": rg['type'], - "ReleaseCountry": releasedict['country'], - "ReleaseFormat": releasedict['format'] + newValueDict = {"ArtistID": release['ArtistID'], + "ArtistName": release['ArtistName'], + "AlbumTitle": release['AlbumTitle'], + "AlbumID": release['AlbumID'], + "AlbumASIN": release['AlbumASIN'], + "ReleaseDate": release['ReleaseDate'], + "Type": release['Type'], + "ReleaseCountry": release['ReleaseCountry'], + "ReleaseFormat": release['ReleaseFormat'] } - + myDB.upsert("allalbums", newValueDict, controlValueDict) # Build the dictionary for the fullreleaselist - newValueDict['ReleaseID'] = release['id'] - newValueDict['Tracks'] = releasedict['tracks'] + newValueDict['ReleaseID'] = release['ReleaseID'] + newValueDict['Tracks'] = release['Tracks'] fullreleaselist.append(newValueDict) - for track in releasedict['tracks']: + for track in release['Tracks']: cleanname = helpers.cleanName(artist['artist_name'] + ' ' + rg['title'] + ' ' + track['title']) controlValueDict = {"TrackID": track['id'], - "ReleaseID": release['id']} + "ReleaseID": release['ReleaseID']} - newValueDict = {"ArtistID": artistid, - "ArtistName": artist['artist_name'], - "AlbumTitle": rg['title'], - "AlbumASIN": releasedict['asin'], - "AlbumID": rg['id'], + newValueDict = {"ArtistID": release['ArtistID'], + "ArtistName": release['ArtistName'], + "AlbumTitle": release['AlbumTitle'], + "AlbumID": release['AlbumID'], + "AlbumASIN": release['AlbumASIN'], "TrackTitle": track['title'], "TrackDuration": track['duration'], "TrackNumber": track['number'], @@ -584,6 +566,8 @@ def getHybridRelease(fullreleaselist): """ Returns a dictionary of best group of tracks from the list of releases & earliest release date """ + if len(fullreleaselist) == 0: + raise Exception("getHybridRelease was called with an empty fullreleaselist") sortable_release_list = [] for release in fullreleaselist: diff --git a/headphones/mb.py b/headphones/mb.py index febd423e..b9a629ac 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -387,8 +387,8 @@ def get_all_releases(rgid): release = {} release['AlbumTitle'] = unicode(releasedata['title']) release['AlbumID'] = unicode(rgid) - release['AlbumASIN'] = unicode(releasedata['asin']) - release['ReleaseDate'] = unicode(releasedata['date']) + release['AlbumASIN'] = unicode(releasedata['asin']) if 'asin' in releasedata else None + release['ReleaseDate'] = unicode(releasedata['date']) if 'date' in releasedata else None release['ReleaseID'] = releasedata['id'] if 'release-group' not in releasedata: raise Exception('No release group associated with release id ' + releasedata['id'] + ' album id' + rgid) @@ -400,7 +400,8 @@ def get_all_releases(rgid): release['ArtistID'] = unicode(releasedata['artist-credit'][0]['artist']['id']) release['ArtistName'] = unicode(releasedata['artist-credit-phrase']) else: - raise Exception('Release ' + releasedata['id'] + ' has no Artists associated.') + logger.warn('Release ' + releasedata['id'] + ' has no Artists associated.') + return False release['ReleaseCountry'] = unicode(releasedata['country']) if 'country' in releasedata else u'Unknown' From ea9f896310eacb24cfdd539c904fb263ef15df2c Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sun, 9 Sep 2012 07:02:59 +0200 Subject: [PATCH 5/6] Added paging to get_all_releases, this will ensure that any future (however unlikely) release groups containing over 100 releases will be read correctly --- headphones/mb.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/headphones/mb.py b/headphones/mb.py index b9a629ac..dd161633 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -368,19 +368,26 @@ def getRelease(releaseid, include_artist_info=True): return release def get_all_releases(rgid): - results = None + results = [] q, sleepytime = startmb() try: - results = musicbrainzngs.browse_releases(release_group=rgid,includes=['artist-credits','labels','recordings','release-groups','media']) + limit = 100 + newResults = None + while newResults == None or len(newResults) >= limit: + newResults = musicbrainzngs.browse_releases(release_group=rgid,includes=['artist-credits','labels','recordings','release-groups','media'],limit=limit,offset=len(results)) + if 'release-list' not in newResults: + break #may want to raise an exception here instead ? + newResults = newResults['release-list'] + results += newResults + except WebServiceError, e: logger.warn('Attempt to retrieve information from MusicBrainz for release group "%s" failed (%s)' % (rgid, str(e))) time.sleep(5) return False - if not results or 'release-list' not in results: + if not results or len(results) == 0: return False - results = results['release-list'] releases = [] for releasedata in results: From 11946b80da2d07d1c730e44c484b70eda1088894 Mon Sep 17 00:00:00 2001 From: Patrick Speiser Date: Sun, 9 Sep 2012 07:14:13 +0200 Subject: [PATCH 6/6] Revert "Possible Fix for Issue #533" This reverts commit 196ea11cedc3719b1835c27638e92e44c3a57770. --- headphones/webstart.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/headphones/webstart.py b/headphones/webstart.py index c8f53e9a..b16d6765 100644 --- a/headphones/webstart.py +++ b/headphones/webstart.py @@ -31,9 +31,6 @@ def initialize(options={}): 'server.socket_port': options['http_port'], 'server.socket_host': options['http_host'], 'engine.autoreload_on': False, - 'tools.encode.on' : True, - 'tools.encode.encoding' : 'utf-8', - 'tools.decode.on' : True, }) conf = {