Refactored Last.FM code

2026-06-10 12:47:44 +01:00 · 2014-04-06 23:30:04 +02:00
parent 41cb84957e
commit 21e1c100e7
3 changed files with 90 additions and 186 deletions
@@ -19,7 +19,7 @@ import glob, urllib
 import lib.simplejson as simplejson

 import headphones
-from headphones import db, helpers, logger
+from headphones import db, helpers, logger, lastfm

 lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4"

@@ -210,15 +210,7 @@ class Cache(object):
        if ArtistID:

            self.id_type = 'artist'
-
-            params = {  "method": "artist.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": ArtistID,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, params=params, timeout=20)
+            data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey)

            if not data:
                return
@@ -226,25 +218,17 @@ class Cache(object):
            try:
                image_url = data['artist']['image'][-1]['#text']
            except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                image_url = None

            thumb_url = self._get_thumb_url(data)
            if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')

        else:

            self.id_type = 'album'
-
-            params = {  "method": "album.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": AlbumID,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
-            data = helpers.request_json(url, params=params, timeout=20)
+            data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey)

            if not data:
                return
@@ -252,13 +236,13 @@ class Cache(object):
            try:
                image_url = data['artist']['image'][-1]['#text']
            except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                image_url = None

            thumb_url = self._get_thumb_url(data)

            if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')

        return {'artwork' : image_url, 'thumbnail' : thumb_url }

@@ -271,14 +255,7 @@ class Cache(object):
        # Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums
        if self.id_type == 'artist':

-            params = {  "method": "artist.getInfo",
-                        "api_key": lastfm_apikey,
-                        "mbid": self.id,
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, timeout=20, params=params)
+            data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey)

            if not data:
                return
@@ -286,36 +263,27 @@ class Cache(object):
            try:
                self.info_summary = data['artist']['bio']['summary']
            except Exception:
-                logger.debug('No artist bio summary found on url: %s', url)
+                logger.debug('No artist bio summary found')
                self.info_summary = None
            try:
                self.info_content = data['artist']['bio']['content']
            except Exception:
-                logger.debug('No artist bio found on url: %s', url)
+                logger.debug('No artist bio found')
                self.info_content = None
            try:
                image_url = data['artist']['image'][-1]['#text']
            except Exception:
-                logger.debug('No artist image found on url: %s', url)
+                logger.debug('No artist image found')
                image_url = None

            thumb_url = self._get_thumb_url(data)
            if not thumb_url:
-                logger.debug('No artist thumbnail image found on url: %s', url)
+                logger.debug('No artist thumbnail image found')

        else:

            dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone()
-
-            params = {  "method": "album.getInfo",
-                        "api_key": lastfm_apikey,
-                        "artist": dbartist['ArtistName'].encode('utf-8'),
-                        "album": dbartist['AlbumTitle'].encode('utf-8'),
-                        "format": "json"
-                        }
-
-            url = "http://ws.audioscrobbler.com/2.0/"
-            data = helpers.request_json(url, timeout=20, params=params)
+            data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey)

            if not data:
                return
@@ -323,23 +291,23 @@ class Cache(object):
            try:
                self.info_summary = data['album']['wiki']['summary']
            except Exception:
-                logger.debug('No album summary found from: %s', url)
+                logger.debug('No album summary found')
                self.info_summary = None
            try:
                self.info_content = data['album']['wiki']['content']
            except Exception:
-                logger.debug('No album infomation found from: %s', url)
+                logger.debug('No album infomation found')
                self.info_content = None
            try:
                image_url = data['album']['image'][-1]['#text']
            except Exception:
-                logger.debug('No album image link found on url: %s', url)
+                logger.debug('No album image link found')
                image_url = None

            thumb_url = self._get_thumb_url(data)

            if not thumb_url:
-                logger.debug('No album thumbnail image found on url: %s', url)
+                logger.debug('No album thumbnail image found')

        #Save the content & summary to the database no matter what if we've opened up the url
        if self.id_type == 'artist':
@@ -646,10 +646,11 @@ def request_soup(url, **kwargs):
    no exceptions are raised.
    """

+    parser = kwargs.pop("parser", "html5lib")
    response = request_response(url, **kwargs)

    if response is not None:
-        return BeautifulSoup(response.content, "html5lib")
+        return BeautifulSoup(response.content, parser)

 def request_minidom(url, **kwargs):
    """
@@ -21,47 +21,60 @@ from headphones import db, logger, helpers

 from collections import defaultdict

+ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/'
 API_KEY = '395e6ec6bb557382fc41fde867bce66f'

+def request_lastfm(method, **kwargs):
+    """
+    Call a Last.FM API method. Automatically sets the method and API key. Method
+    will return the result if no error occured.
+
+    By default, this method will request the JSON format, since it is lighter
+    than XML.
+    """
+
+    # Prepare request
+    kwargs["method"] = method
+    kwargs.setdefault("api_key", API_KEY)
+    kwargs.setdefault("format", "json")
+
+    # Send request
+    logger.debug("Calling Last.FM method: %s", method)
+    data = helpers.request_json(ENTRY_POINT, timeout=20, params=kwargs)
+
+    # Parse response and check for errors.
+    if not data:
+        logger.error("Error calling Last.FM method: %s", method)
+        return
+
+    if "error" in data:
+        logger.debug("Last.FM returned an error: %s", data["message"])
+        return
+
+    return data
+
 def getSimilar():
    myDB = db.DBConnection()
    results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')

+    logger.info("Fetching similar artists from Last.FM for tag cloud")
    artistlist = []

    for result in results[:12]:
-        params = {
-            "method": "artist.getsimilar",
-            "mbid": result['ArtistID'],
-            "api_key": API_KEY
-        }
+        data = request_lastfm("artist.getsimilar", mbid=result['ArtistId'])

-        url = 'http://ws.audioscrobbler.com/2.0/'
-        dom = request_minidom(url, timeout=20, params=params)
+        if data and "similarartists" in data:
+            artists = data["similarartists"]["artist"]

-        if not dom:
-            logger.debug("Could not parse similar artist data from Last.FM")
-            continue
+            for artist in artists:
+                artist_mbid = artist["mbid"]
+                artist_name = artist["name"]

-        artists = dom.getElementsByTagName("artist")
-        logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-        for artist in artists:
-            namenode = artist.getElementsByTagName("name")[0].childNodes
-            mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-            for node in namenode:
-                artist_name = node.data
-            for node in mbidnode:
-                artist_mbid = node.data
-
-            try:
                if not any(artist_mbid in x for x in results):
                    artistlist.append((artist_name, artist_mbid))
-            except Exception:
-                logger.exception("Unhandled exception")
-                continue

+    # Add new artists to tag cloud
+    logger.debug("Fetched %d artists from Last.FM", len(artistlist))
    count = defaultdict(int)

    for artist, mbid in artistlist:
@@ -72,143 +85,65 @@ def getSimilar():

    random.shuffle(top_list)

-    myDB.action('''DELETE from lastfmcloud''')
-    for tuple in top_list:
-        artist_name, artist_mbid = tuple[0]
-        count = tuple[1]
+    myDB.action("DELETE from lastfmcloud")
+    for item in top_list:
+        artist_name, artist_mbid = item[0]
+        count = item[1]
+
        myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])

+    logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list))
+
 def getArtists():
    myDB = db.DBConnection()
    results = myDB.select('SELECT ArtistID from artists')

    if not headphones.LASTFM_USERNAME:
-        logger.warn("Last.FM username not set")
+        logger.warn("Last.FM username not set, not importing artists.")
        return

-    params = {
-        "method": "library.getartists",
-        "limit": 10000,
-        "api_key": API_KEY,
-        "user": headphones.LASTFM_USERNAME
-    }
+    logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME)
+    data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME)

-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = request_minidom(url, timeout=20, params=params)
+    if data and "artists" in data:
+        artistlist = []
+        artists = data["artists"]["artist"]
+        logger.debug("Fetched %d artists from Last.FM", len(artists))

-    if not dom:
-        logger.debug("Could not parse artist list from Last.FM")
-        return
+        for artist in artists:
+            artist_mbid = artist["mbid"]

-    artists = dom.getElementsByTagName("artist")
-    logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-    artistlist = []
-
-    for artist in artists:
-        mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-        for node in mbidnode:
-            artist_mbid = node.data
-
-        try:
            if not any(artist_mbid in x for x in results):
                artistlist.append(artist_mbid)
-        except Exception:
-            logger.exception("Unhandled exception")
-            continue

-    from headphones import importer
+        from headphones import importer

-    for artistid in artistlist:
-        importer.addArtisttoDB(artistid)
+        for artistid in artistlist:
+            importer.addArtisttoDB(artistid)

-    logger.info("Imported %d new artists from Last.FM", len(artistid))
+        logger.info("Imported %d new artists from Last.FM", len(artistlist))

 def getTagTopArtists(tag, limit=50):
    myDB = db.DBConnection()
    results = myDB.select('SELECT ArtistID from artists')

-    params = {
-        "method": "tag.gettopartists",
-        "limit": limit,
-        "tag": tag,
-        "api_key": API_KEY
-    }
+    logger.info("Fetching top artists from Last.FM for tag: %s", tag)
+    data = request_lastfm("tag.gettopartists", limit=limit, tag=tag)

-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = request_minidom(url, timeout=20, params=param)
+    if data and "topartists" in data:
+        artistlist = []
+        artists = data["topartists"]["artist"]
+        logger.debug("Fetched %d artists from Last.FM", len(artists))

-    if not dom:
-        logger.debug("Could not parse artist list from Last.FM")
-        return
+        for artist in artists:
+            artist_mbid = artist["mbid"]

-    artists = d.getElementsByTagName("artist")
-    logger.debug("Fetched %d artists from Last.FM", len(artists))
-
-    artistlist = []
-
-    for artist in artists:
-        mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
-
-        for node in mbidnode:
-            artist_mbid = node.data
-
-        try:
            if not any(artist_mbid in x for x in results):
                artistlist.append(artist_mbid)
-        except Exception:
-            logger.exception("Unhandled exception")
-            continue

-    from headphones import importer
+        from headphones import importer

-    for artistid in artistlist:
-        importer.addArtisttoDB(artistid)
+        for artistid in artistlist:
+            importer.addArtisttoDB(artistid)

-def getAlbumDescription(rgid, artist, album):
-    myDB = db.DBConnection()
-    result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
-
-    if result:
-        logger.info("No summary found for release group id: %s", rgid)
-        return
-
-    params = {
-        "method": 'album.getInfo',
-        "api_key": api_key,
-        "artist": artist.encode('utf-8'),
-        "album": album.encode('utf-8')
-    }
-
-    url = 'http://ws.audioscrobbler.com/2.0/'
-    dom = helpers.request_minidom(url, timeout=20, params=params)
-
-    if not dom:
-        logger.debug("Could not parse album description from Last.FM")
-        return
-
-    if dom.getElementsByTagName("error"):
-        logger.debug("Last.FM returned error")
-        return
-
-    albuminfo = dom.getElementsByTagName("album")
-    logger.debug("Fetched %d albums from Last.FM", len(artists))
-
-    for item in albuminfo:
-        try:
-            summarynode = item.getElementsByTagName("summary")[0].childNodes
-            contentnode = item.getElementsByTagName("content")[0].childNodes
-
-            for node in summarynode:
-                summary = node.data
-            for node in contentnode:
-                content = node.data
-
-            controlValueDict = {'ReleaseGroupID': rgid}
-            newValueDict = {'Summary': summary,
-                            'Content': content}
-            myDB.upsert("descriptions", newValueDict, controlValueDict)
-        except:
-            logger.exception("Unhandled exception")
-            return
+        logger.debug("Added %d new artists from Last.FM", len(artistlist))