From 21e1c100e7c2d253ddbafd2a87e4286ba7479e6a Mon Sep 17 00:00:00 2001
From: Bas Stottelaar <basstottelaar@gmail.com>
Date: Sun, 6 Apr 2014 23:30:04 +0200
Subject: [PATCH] Refactored Last.FM code

---
 headphones/cache.py   |  66 ++++----------
 headphones/helpers.py |   3 +-
 headphones/lastfm.py  | 207 +++++++++++++++---------------------------
 3 files changed, 90 insertions(+), 186 deletions(-)

diff --git a/headphones/cache.py b/headphones/cache.py
index 064c4166..89754455 100644
--- a/headphones/cache.py
+++ b/headphones/cache.py
@@ -19,7 +19,7 @@ import glob, urllib
 import lib.simplejson as simplejson
 
 import headphones
-from headphones import db, helpers, logger
+from headphones import db, helpers, logger, lastfm
 
 lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4"
 
@@ -210,15 +210,7 @@ class Cache(object):
         if ArtistID:
 
             self.id_type = 'artist'
-
-            params = {  "method": "artist.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": ArtistID,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, params=params, timeout=20)
+            data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey)
 
             if not data:
                 return
@@ -226,25 +218,17 @@ class Cache(object):
             try:
                 image_url = data['artist']['image'][-1]['#text']
             except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                 image_url = None
 
             thumb_url = self._get_thumb_url(data)
             if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')
 
         else:
 
             self.id_type = 'album'
-
-            params = {  "method": "album.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": AlbumID,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
-            data = helpers.request_json(url, params=params, timeout=20)
+            data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey)
 
             if not data:
                 return
@@ -252,13 +236,13 @@ class Cache(object):
             try:
                 image_url = data['artist']['image'][-1]['#text']
             except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                 image_url = None
 
             thumb_url = self._get_thumb_url(data)
 
             if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')
 
         return {'artwork' : image_url, 'thumbnail' : thumb_url }
 
@@ -271,14 +255,7 @@ class Cache(object):
         # Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums
         if self.id_type == 'artist':
 
-            params = {  "method": "artist.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": self.id,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, timeout=20, params=params)
+            data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey)
 
             if not data:
                 return
@@ -286,36 +263,27 @@ class Cache(object):
             try:
                 self.info_summary = data['artist']['bio']['summary']
             except Exception:
-                logger.debug('No artist bio summary found on url: %s', url)
+                logger.debug('No artist bio summary found')
                 self.info_summary = None
             try:
                 self.info_content = data['artist']['bio']['content']
             except Exception:
-                logger.debug('No artist bio found on url: %s', url)
+                logger.debug('No artist bio found')
                 self.info_content = None
             try:
                 image_url = data['artist']['image'][-1]['#text']
             except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                 image_url = None
 
             thumb_url = self._get_thumb_url(data)
             if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')
 
         else:
 
             dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone()
-
-            params = {  "method": "album.getInfo",
-                        "api_key": lastfm_apikey,
-                        "artist": dbartist['ArtistName'].encode('utf-8'),
-                        "album": dbartist['AlbumTitle'].encode('utf-8'),
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, timeout=20, params=params)
+            data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey)
 
             if not data:
                 return
@@ -323,23 +291,23 @@ class Cache(object):
             try:
                 self.info_summary = data['album']['wiki']['summary']
             except Exception:
-                logger.debug('No album summary found from: %s', url)
+                logger.debug('No album summary found')
                 self.info_summary = None
             try:
                 self.info_content = data['album']['wiki']['content']
             except Exception:
-                logger.debug('No album infomation found from: %s', url)
+                logger.debug('No album infomation found')
                 self.info_content = None
             try:
                 image_url = data['album']['image'][-1]['#text']
             except Exception:
-                logger.debug('No album image link found on url: %s', url)
+                logger.debug('No album image link found')
                 image_url = None
 
             thumb_url = self._get_thumb_url(data)
 
             if not thumb_url:
-                logger.debug('No album thumbnail image found on url: %s', url)
+                logger.debug('No album thumbnail image found')
 
         #Save the content & summary to the database no matter what if we've opened up the url
         if self.id_type == 'artist':
diff --git a/headphones/helpers.py b/headphones/helpers.py
index b18cda6b..f0f6d363 100644
--- a/headphones/helpers.py
+++ b/headphones/helpers.py
@@ -646,10 +646,11 @@ def request_soup(url, **kwargs):
     no exceptions are raised.
     """
 
+    parser = kwargs.pop("parser", "html5lib")
     response = request_response(url, **kwargs)
 
     if response is not None:
-        return BeautifulSoup(response.content, "html5lib")
+        return BeautifulSoup(response.content, parser)
 
 def request_minidom(url, **kwargs):
     """
diff --git a/headphones/lastfm.py b/headphones/lastfm.py
index 71ded140..65fec683 100644
--- a/headphones/lastfm.py
+++ b/headphones/lastfm.py
@@ -21,47 +21,60 @@ from headphones import db, logger, helpers
 
 from collections import defaultdict
 
+ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/'
 API_KEY = '395e6ec6bb557382fc41fde867bce66f'
 
+def request_lastfm(method, **kwargs):
+    """
+    Call a Last.FM API method. Automatically sets the method and API key. Method
+    will return the result if no error occured.
+
+    By default, this method will request the JSON format, since it is lighter
+    than XML.
+    """
+
+    # Prepare request
+    kwargs["method"] = method
+    kwargs.setdefault("api_key", API_KEY)
+    kwargs.setdefault("format", "json")
+
+    # Send request
+    logger.debug("Calling Last.FM method: %s", method)
+    data = helpers.request_json(ENTRY_POINT, timeout=20, params=kwargs)
+
+    # Parse response and check for errors.
+    if not data:
+        logger.error("Error calling Last.FM method: %s", method)
+        return
+
+    if "error" in data:
+        logger.debug("Last.FM returned an error: %s", data["message"])
+        return
+
+    return data
+
 def getSimilar():
     myDB = db.DBConnection()
     results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')
 
+    logger.info("Fetching similar artists from Last.FM for tag cloud")
     artistlist = []
 
     for result in results[:12]:
-        params = {
-            "method": "artist.getsimilar",
-            "mbid": result['ArtistID'],
-            "api_key": API_KEY
-        }
+        data = request_lastfm("artist.getsimilar", mbid=result['ArtistId'])
 
-        url = 'http://ws.audioscrobbler.com/2.0/'
-        dom = request_minidom(url, timeout=20, params=params)
+        if data and "similarartists" in data:
+            artists = data["similarartists"]["artist"]
 
-        if not dom:
-            logger.debug("Could not parse similar artist data from Last.FM")
-            continue
+            for artist in artists:
+                artist_mbid = artist["mbid"]
+                artist_name = artist["name"]
 
-        artists = dom.getElementsByTagName("artist")
-        logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-        for artist in artists:
-            namenode = artist.getElementsByTagName("name")[0].childNodes
-            mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-            for node in namenode:
-                artist_name = node.data
-            for node in mbidnode:
-                artist_mbid = node.data
-
-            try:
                 if not any(artist_mbid in x for x in results):
                     artistlist.append((artist_name, artist_mbid))
-            except Exception:
-                logger.exception("Unhandled exception")
-                continue
 
+    # Add new artists to tag cloud
+    logger.debug("Fetched %d artists from Last.FM", len(artistlist))
     count = defaultdict(int)
 
     for artist, mbid in artistlist:
@@ -72,143 +85,65 @@ def getSimilar():
 
     random.shuffle(top_list)
 
-    myDB.action('''DELETE from lastfmcloud''')
-    for tuple in top_list:
-        artist_name, artist_mbid = tuple[0]
-        count = tuple[1]
+    myDB.action("DELETE from lastfmcloud")
+    for item in top_list:
+        artist_name, artist_mbid = item[0]
+        count = item[1]
+
         myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
 
+    logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list))
+
 def getArtists():
     myDB = db.DBConnection()
     results = myDB.select('SELECT ArtistID from artists')
 
     if not headphones.LASTFM_USERNAME:
-        logger.warn("Last.FM username not set")
+        logger.warn("Last.FM username not set, not importing artists.")
         return
 
-    params = {
-        "method": "library.getartists",
-        "limit": 10000,
-        "api_key": API_KEY,
-        "user": headphones.LASTFM_USERNAME
-    }
+    logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME)
+    data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME)
 
-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = request_minidom(url, timeout=20, params=params)
+    if data and "artists" in data:
+        artistlist = []
+        artists = data["artists"]["artist"]
+        logger.debug("Fetched %d artists from Last.FM", len(artists))
 
-    if not dom:
-        logger.debug("Could not parse artist list from Last.FM")
-        return
+        for artist in artists:
+            artist_mbid = artist["mbid"]
 
-    artists = dom.getElementsByTagName("artist")
-    logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-    artistlist = []
-
-    for artist in artists:
-        mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-        for node in mbidnode:
-            artist_mbid = node.data
-
-        try:
             if not any(artist_mbid in x for x in results):
                 artistlist.append(artist_mbid)
-        except Exception:
-            logger.exception("Unhandled exception")
-            continue
 
-    from headphones import importer
+        from headphones import importer
 
-    for artistid in artistlist:
-        importer.addArtisttoDB(artistid)
+        for artistid in artistlist:
+            importer.addArtisttoDB(artistid)
 
-    logger.info("Imported %d new artists from Last.FM", len(artistid))
+        logger.info("Imported %d new artists from Last.FM", len(artistlist))
 
 def getTagTopArtists(tag, limit=50):
     myDB = db.DBConnection()
     results = myDB.select('SELECT ArtistID from artists')
 
-    params = {
-        "method": "tag.gettopartists",
-        "limit": limit,
-        "tag": tag,
-        "api_key": API_KEY
-    }
+    logger.info("Fetching top artists from Last.FM for tag: %s", tag)
+    data = request_lastfm("tag.gettopartists", limit=limit, tag=tag)
 
-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = request_minidom(url, timeout=20, params=param)
+    if data and "topartists" in data:
+        artistlist = []
+        artists = data["topartists"]["artist"]
+        logger.debug("Fetched %d artists from Last.FM", len(artists))
 
-    if not dom:
-        logger.debug("Could not parse artist list from Last.FM")
-        return
+        for artist in artists:
+            artist_mbid = artist["mbid"]
 
-    artists = d.getElementsByTagName("artist")
-    logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-    artistlist = []
-
-    for artist in artists:
-        mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-        for node in mbidnode:
-            artist_mbid = node.data
-
-        try:
             if not any(artist_mbid in x for x in results):
                 artistlist.append(artist_mbid)
-        except Exception:
-            logger.exception("Unhandled exception")
-            continue
 
-    from headphones import importer
+        from headphones import importer
 
-    for artistid in artistlist:
-        importer.addArtisttoDB(artistid)
+        for artistid in artistlist:
+            importer.addArtisttoDB(artistid)
 
-def getAlbumDescription(rgid, artist, album):
-    myDB = db.DBConnection()
-    result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
-
-    if result:
-        logger.info("No summary found for release group id: %s", rgid)
-        return
-
-    params = {
-        "method": 'album.getInfo',
-        "api_key": api_key,
-        "artist": artist.encode('utf-8'),
-        "album": album.encode('utf-8')
-    }
-
-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = helpers.request_minidom(url, timeout=20, params=params)
-
-    if not dom:
-        logger.debug("Could not parse album description from Last.FM")
-        return
-
-    if dom.getElementsByTagName("error"):
-        logger.debug("Last.FM returned error")
-        return
-
-    albuminfo = dom.getElementsByTagName("album")
-    logger.debug("Fetched %d albums from Last.FM", len(artists))
-
-    for item in albuminfo:
-        try:
-            summarynode = item.getElementsByTagName("summary")[0].childNodes
-            contentnode = item.getElementsByTagName("content")[0].childNodes
-
-            for node in summarynode:
-                summary = node.data
-            for node in contentnode:
-                content = node.data
-
-            controlValueDict = {'ReleaseGroupID': rgid}
-            newValueDict = {'Summary': summary,
-                            'Content': content}
-            myDB.upsert("descriptions", newValueDict, controlValueDict)
-        except:
-            logger.exception("Unhandled exception")
-            return
\ No newline at end of file
+        logger.debug("Added %d new artists from Last.FM", len(artistlist))
\ No newline at end of file