From 21e1c100e7c2d253ddbafd2a87e4286ba7479e6a Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Sun, 6 Apr 2014 23:30:04 +0200 Subject: [PATCH] Refactored Last.FM code --- headphones/cache.py | 66 ++++---------- headphones/helpers.py | 3 +- headphones/lastfm.py | 207 +++++++++++++++--------------------------- 3 files changed, 90 insertions(+), 186 deletions(-) diff --git a/headphones/cache.py b/headphones/cache.py index 064c4166..89754455 100644 --- a/headphones/cache.py +++ b/headphones/cache.py @@ -19,7 +19,7 @@ import glob, urllib import lib.simplejson as simplejson import headphones -from headphones import db, helpers, logger +from headphones import db, helpers, logger, lastfm lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4" @@ -210,15 +210,7 @@ class Cache(object): if ArtistID: self.id_type = 'artist' - - params = { "method": "artist.getInfo", - "api_key": lastfm_apikey, - "mbid": ArtistID, - "format": "json" - } - - url = "http://ws.audioscrobbler.com/2.0/" - data = helpers.request_json(url, params=params, timeout=20) + data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey) if not data: return @@ -226,25 +218,17 @@ class Cache(object): try: image_url = data['artist']['image'][-1]['#text'] except Exception: - logger.debug('No artist image found on url: %s', url) + logger.debug('No artist image found') image_url = None thumb_url = self._get_thumb_url(data) if not thumb_url: - logger.debug('No artist thumbnail image found on url: %s', url) + logger.debug('No artist thumbnail image found') else: self.id_type = 'album' - - params = { "method": "album.getInfo", - "api_key": lastfm_apikey, - "mbid": AlbumID, - "format": "json" - } - - url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params) - data = helpers.request_json(url, params=params, timeout=20) + data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey) if not data: return @@ -252,13 +236,13 @@ class Cache(object): try: image_url = data['artist']['image'][-1]['#text'] except Exception: - logger.debug('No artist image found on url: %s', url) + logger.debug('No artist image found') image_url = None thumb_url = self._get_thumb_url(data) if not thumb_url: - logger.debug('No artist thumbnail image found on url: %s', url) + logger.debug('No artist thumbnail image found') return {'artwork' : image_url, 'thumbnail' : thumb_url } @@ -271,14 +255,7 @@ class Cache(object): # Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums if self.id_type == 'artist': - params = { "method": "artist.getInfo", - "api_key": lastfm_apikey, - "mbid": self.id, - "format": "json" - } - - url = "http://ws.audioscrobbler.com/2.0/" - data = helpers.request_json(url, timeout=20, params=params) + data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey) if not data: return @@ -286,36 +263,27 @@ class Cache(object): try: self.info_summary = data['artist']['bio']['summary'] except Exception: - logger.debug('No artist bio summary found on url: %s', url) + logger.debug('No artist bio summary found') self.info_summary = None try: self.info_content = data['artist']['bio']['content'] except Exception: - logger.debug('No artist bio found on url: %s', url) + logger.debug('No artist bio found') self.info_content = None try: image_url = data['artist']['image'][-1]['#text'] except Exception: - logger.debug('No artist image found on url: %s', url) + logger.debug('No artist image found') image_url = None thumb_url = self._get_thumb_url(data) if not thumb_url: - logger.debug('No artist thumbnail image found on url: %s', url) + logger.debug('No artist thumbnail image found') else: dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone() - - params = { "method": "album.getInfo", - "api_key": lastfm_apikey, - "artist": dbartist['ArtistName'].encode('utf-8'), - "album": dbartist['AlbumTitle'].encode('utf-8'), - "format": "json" - } - - url = "http://ws.audioscrobbler.com/2.0/" - data = helpers.request_json(url, timeout=20, params=params) + data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey) if not data: return @@ -323,23 +291,23 @@ class Cache(object): try: self.info_summary = data['album']['wiki']['summary'] except Exception: - logger.debug('No album summary found from: %s', url) + logger.debug('No album summary found') self.info_summary = None try: self.info_content = data['album']['wiki']['content'] except Exception: - logger.debug('No album infomation found from: %s', url) + logger.debug('No album infomation found') self.info_content = None try: image_url = data['album']['image'][-1]['#text'] except Exception: - logger.debug('No album image link found on url: %s', url) + logger.debug('No album image link found') image_url = None thumb_url = self._get_thumb_url(data) if not thumb_url: - logger.debug('No album thumbnail image found on url: %s', url) + logger.debug('No album thumbnail image found') #Save the content & summary to the database no matter what if we've opened up the url if self.id_type == 'artist': diff --git a/headphones/helpers.py b/headphones/helpers.py index b18cda6b..f0f6d363 100644 --- a/headphones/helpers.py +++ b/headphones/helpers.py @@ -646,10 +646,11 @@ def request_soup(url, **kwargs): no exceptions are raised. """ + parser = kwargs.pop("parser", "html5lib") response = request_response(url, **kwargs) if response is not None: - return BeautifulSoup(response.content, "html5lib") + return BeautifulSoup(response.content, parser) def request_minidom(url, **kwargs): """ diff --git a/headphones/lastfm.py b/headphones/lastfm.py index 71ded140..65fec683 100644 --- a/headphones/lastfm.py +++ b/headphones/lastfm.py @@ -21,47 +21,60 @@ from headphones import db, logger, helpers from collections import defaultdict +ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/' API_KEY = '395e6ec6bb557382fc41fde867bce66f' +def request_lastfm(method, **kwargs): + """ + Call a Last.FM API method. Automatically sets the method and API key. Method + will return the result if no error occured. + + By default, this method will request the JSON format, since it is lighter + than XML. + """ + + # Prepare request + kwargs["method"] = method + kwargs.setdefault("api_key", API_KEY) + kwargs.setdefault("format", "json") + + # Send request + logger.debug("Calling Last.FM method: %s", method) + data = helpers.request_json(ENTRY_POINT, timeout=20, params=kwargs) + + # Parse response and check for errors. + if not data: + logger.error("Error calling Last.FM method: %s", method) + return + + if "error" in data: + logger.debug("Last.FM returned an error: %s", data["message"]) + return + + return data + def getSimilar(): myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC') + logger.info("Fetching similar artists from Last.FM for tag cloud") artistlist = [] for result in results[:12]: - params = { - "method": "artist.getsimilar", - "mbid": result['ArtistID'], - "api_key": API_KEY - } + data = request_lastfm("artist.getsimilar", mbid=result['ArtistId']) - url = 'http://ws.audioscrobbler.com/2.0/' - dom = request_minidom(url, timeout=20, params=params) + if data and "similarartists" in data: + artists = data["similarartists"]["artist"] - if not dom: - logger.debug("Could not parse similar artist data from Last.FM") - continue + for artist in artists: + artist_mbid = artist["mbid"] + artist_name = artist["name"] - artists = dom.getElementsByTagName("artist") - logger.debug("Fetched %d artists from Last.FM", len(artists)) - - for artist in artists: - namenode = artist.getElementsByTagName("name")[0].childNodes - mbidnode = artist.getElementsByTagName("mbid")[0].childNodes - - for node in namenode: - artist_name = node.data - for node in mbidnode: - artist_mbid = node.data - - try: if not any(artist_mbid in x for x in results): artistlist.append((artist_name, artist_mbid)) - except Exception: - logger.exception("Unhandled exception") - continue + # Add new artists to tag cloud + logger.debug("Fetched %d artists from Last.FM", len(artistlist)) count = defaultdict(int) for artist, mbid in artistlist: @@ -72,143 +85,65 @@ def getSimilar(): random.shuffle(top_list) - myDB.action('''DELETE from lastfmcloud''') - for tuple in top_list: - artist_name, artist_mbid = tuple[0] - count = tuple[1] + myDB.action("DELETE from lastfmcloud") + for item in top_list: + artist_name, artist_mbid = item[0] + count = item[1] + myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count]) + logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list)) + def getArtists(): myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists') if not headphones.LASTFM_USERNAME: - logger.warn("Last.FM username not set") + logger.warn("Last.FM username not set, not importing artists.") return - params = { - "method": "library.getartists", - "limit": 10000, - "api_key": API_KEY, - "user": headphones.LASTFM_USERNAME - } + logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME) + data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME) - url = 'http://ws.audioscrobbler.com/2.0/' - dom = request_minidom(url, timeout=20, params=params) + if data and "artists" in data: + artistlist = [] + artists = data["artists"]["artist"] + logger.debug("Fetched %d artists from Last.FM", len(artists)) - if not dom: - logger.debug("Could not parse artist list from Last.FM") - return + for artist in artists: + artist_mbid = artist["mbid"] - artists = dom.getElementsByTagName("artist") - logger.debug("Fetched %d artists from Last.FM", len(artists)) - - artistlist = [] - - for artist in artists: - mbidnode = artist.getElementsByTagName("mbid")[0].childNodes - - for node in mbidnode: - artist_mbid = node.data - - try: if not any(artist_mbid in x for x in results): artistlist.append(artist_mbid) - except Exception: - logger.exception("Unhandled exception") - continue - from headphones import importer + from headphones import importer - for artistid in artistlist: - importer.addArtisttoDB(artistid) + for artistid in artistlist: + importer.addArtisttoDB(artistid) - logger.info("Imported %d new artists from Last.FM", len(artistid)) + logger.info("Imported %d new artists from Last.FM", len(artistlist)) def getTagTopArtists(tag, limit=50): myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists') - params = { - "method": "tag.gettopartists", - "limit": limit, - "tag": tag, - "api_key": API_KEY - } + logger.info("Fetching top artists from Last.FM for tag: %s", tag) + data = request_lastfm("tag.gettopartists", limit=limit, tag=tag) - url = 'http://ws.audioscrobbler.com/2.0/' - dom = request_minidom(url, timeout=20, params=param) + if data and "topartists" in data: + artistlist = [] + artists = data["topartists"]["artist"] + logger.debug("Fetched %d artists from Last.FM", len(artists)) - if not dom: - logger.debug("Could not parse artist list from Last.FM") - return + for artist in artists: + artist_mbid = artist["mbid"] - artists = d.getElementsByTagName("artist") - logger.debug("Fetched %d artists from Last.FM", len(artists)) - - artistlist = [] - - for artist in artists: - mbidnode = artist.getElementsByTagName("mbid")[0].childNodes - - for node in mbidnode: - artist_mbid = node.data - - try: if not any(artist_mbid in x for x in results): artistlist.append(artist_mbid) - except Exception: - logger.exception("Unhandled exception") - continue - from headphones import importer + from headphones import importer - for artistid in artistlist: - importer.addArtisttoDB(artistid) + for artistid in artistlist: + importer.addArtisttoDB(artistid) -def getAlbumDescription(rgid, artist, album): - myDB = db.DBConnection() - result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid]) - - if result: - logger.info("No summary found for release group id: %s", rgid) - return - - params = { - "method": 'album.getInfo', - "api_key": api_key, - "artist": artist.encode('utf-8'), - "album": album.encode('utf-8') - } - - url = 'http://ws.audioscrobbler.com/2.0/' - dom = helpers.request_minidom(url, timeout=20, params=params) - - if not dom: - logger.debug("Could not parse album description from Last.FM") - return - - if dom.getElementsByTagName("error"): - logger.debug("Last.FM returned error") - return - - albuminfo = dom.getElementsByTagName("album") - logger.debug("Fetched %d albums from Last.FM", len(artists)) - - for item in albuminfo: - try: - summarynode = item.getElementsByTagName("summary")[0].childNodes - contentnode = item.getElementsByTagName("content")[0].childNodes - - for node in summarynode: - summary = node.data - for node in contentnode: - content = node.data - - controlValueDict = {'ReleaseGroupID': rgid} - newValueDict = {'Summary': summary, - 'Content': content} - myDB.upsert("descriptions", newValueDict, controlValueDict) - except: - logger.exception("Unhandled exception") - return \ No newline at end of file + logger.debug("Added %d new artists from Last.FM", len(artistlist)) \ No newline at end of file