Refactored Last.FM code

This commit is contained in:
Bas Stottelaar
2014-04-06 23:30:04 +02:00
parent 41cb84957e
commit 21e1c100e7
3 changed files with 90 additions and 186 deletions

View File

@@ -19,7 +19,7 @@ import glob, urllib
import lib.simplejson as simplejson
import headphones
from headphones import db, helpers, logger
from headphones import db, helpers, logger, lastfm
lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4"
@@ -210,15 +210,7 @@ class Cache(object):
if ArtistID:
self.id_type = 'artist'
params = { "method": "artist.getInfo",
"api_key": lastfm_apikey,
"mbid": ArtistID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, params=params, timeout=20)
data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey)
if not data:
return
@@ -226,25 +218,17 @@ class Cache(object):
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: %s', url)
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url)
logger.debug('No artist thumbnail image found')
else:
self.id_type = 'album'
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"mbid": AlbumID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
data = helpers.request_json(url, params=params, timeout=20)
data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey)
if not data:
return
@@ -252,13 +236,13 @@ class Cache(object):
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: %s', url)
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url)
logger.debug('No artist thumbnail image found')
return {'artwork' : image_url, 'thumbnail' : thumb_url }
@@ -271,14 +255,7 @@ class Cache(object):
# Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums
if self.id_type == 'artist':
params = { "method": "artist.getInfo",
"api_key": lastfm_apikey,
"mbid": self.id,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, timeout=20, params=params)
data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey)
if not data:
return
@@ -286,36 +263,27 @@ class Cache(object):
try:
self.info_summary = data['artist']['bio']['summary']
except Exception:
logger.debug('No artist bio summary found on url: %s', url)
logger.debug('No artist bio summary found')
self.info_summary = None
try:
self.info_content = data['artist']['bio']['content']
except Exception:
logger.debug('No artist bio found on url: %s', url)
logger.debug('No artist bio found')
self.info_content = None
try:
image_url = data['artist']['image'][-1]['#text']
except Exception:
logger.debug('No artist image found on url: %s', url)
logger.debug('No artist image found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url)
logger.debug('No artist thumbnail image found')
else:
dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone()
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"artist": dbartist['ArtistName'].encode('utf-8'),
"album": dbartist['AlbumTitle'].encode('utf-8'),
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, timeout=20, params=params)
data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey)
if not data:
return
@@ -323,23 +291,23 @@ class Cache(object):
try:
self.info_summary = data['album']['wiki']['summary']
except Exception:
logger.debug('No album summary found from: %s', url)
logger.debug('No album summary found')
self.info_summary = None
try:
self.info_content = data['album']['wiki']['content']
except Exception:
logger.debug('No album infomation found from: %s', url)
logger.debug('No album infomation found')
self.info_content = None
try:
image_url = data['album']['image'][-1]['#text']
except Exception:
logger.debug('No album image link found on url: %s', url)
logger.debug('No album image link found')
image_url = None
thumb_url = self._get_thumb_url(data)
if not thumb_url:
logger.debug('No album thumbnail image found on url: %s', url)
logger.debug('No album thumbnail image found')
#Save the content & summary to the database no matter what if we've opened up the url
if self.id_type == 'artist':

View File

@@ -646,10 +646,11 @@ def request_soup(url, **kwargs):
no exceptions are raised.
"""
parser = kwargs.pop("parser", "html5lib")
response = request_response(url, **kwargs)
if response is not None:
return BeautifulSoup(response.content, "html5lib")
return BeautifulSoup(response.content, parser)
def request_minidom(url, **kwargs):
"""

View File

@@ -21,47 +21,60 @@ from headphones import db, logger, helpers
from collections import defaultdict
ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/'
API_KEY = '395e6ec6bb557382fc41fde867bce66f'
def request_lastfm(method, **kwargs):
"""
Call a Last.FM API method. Automatically sets the method and API key. Method
will return the result if no error occured.
By default, this method will request the JSON format, since it is lighter
than XML.
"""
# Prepare request
kwargs["method"] = method
kwargs.setdefault("api_key", API_KEY)
kwargs.setdefault("format", "json")
# Send request
logger.debug("Calling Last.FM method: %s", method)
data = helpers.request_json(ENTRY_POINT, timeout=20, params=kwargs)
# Parse response and check for errors.
if not data:
logger.error("Error calling Last.FM method: %s", method)
return
if "error" in data:
logger.debug("Last.FM returned an error: %s", data["message"])
return
return data
def getSimilar():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')
logger.info("Fetching similar artists from Last.FM for tag cloud")
artistlist = []
for result in results[:12]:
params = {
"method": "artist.getsimilar",
"mbid": result['ArtistID'],
"api_key": API_KEY
}
data = request_lastfm("artist.getsimilar", mbid=result['ArtistId'])
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=params)
if data and "similarartists" in data:
artists = data["similarartists"]["artist"]
if not dom:
logger.debug("Could not parse similar artist data from Last.FM")
continue
for artist in artists:
artist_mbid = artist["mbid"]
artist_name = artist["name"]
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
for artist in artists:
namenode = artist.getElementsByTagName("name")[0].childNodes
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in namenode:
artist_name = node.data
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append((artist_name, artist_mbid))
except Exception:
logger.exception("Unhandled exception")
continue
# Add new artists to tag cloud
logger.debug("Fetched %d artists from Last.FM", len(artistlist))
count = defaultdict(int)
for artist, mbid in artistlist:
@@ -72,143 +85,65 @@ def getSimilar():
random.shuffle(top_list)
myDB.action('''DELETE from lastfmcloud''')
for tuple in top_list:
artist_name, artist_mbid = tuple[0]
count = tuple[1]
myDB.action("DELETE from lastfmcloud")
for item in top_list:
artist_name, artist_mbid = item[0]
count = item[1]
myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list))
def getArtists():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
if not headphones.LASTFM_USERNAME:
logger.warn("Last.FM username not set")
logger.warn("Last.FM username not set, not importing artists.")
return
params = {
"method": "library.getartists",
"limit": 10000,
"api_key": API_KEY,
"user": headphones.LASTFM_USERNAME
}
logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME)
data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME)
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=params)
if data and "artists" in data:
artistlist = []
artists = data["artists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
if not dom:
logger.debug("Could not parse artist list from Last.FM")
return
for artist in artists:
artist_mbid = artist["mbid"]
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
for artistid in artistlist:
importer.addArtisttoDB(artistid)
logger.info("Imported %d new artists from Last.FM", len(artistid))
logger.info("Imported %d new artists from Last.FM", len(artistlist))
def getTagTopArtists(tag, limit=50):
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
params = {
"method": "tag.gettopartists",
"limit": limit,
"tag": tag,
"api_key": API_KEY
}
logger.info("Fetching top artists from Last.FM for tag: %s", tag)
data = request_lastfm("tag.gettopartists", limit=limit, tag=tag)
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=param)
if data and "topartists" in data:
artistlist = []
artists = data["topartists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
if not dom:
logger.debug("Could not parse artist list from Last.FM")
return
for artist in artists:
artist_mbid = artist["mbid"]
artists = d.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
for artistid in artistlist:
importer.addArtisttoDB(artistid)
def getAlbumDescription(rgid, artist, album):
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
logger.info("No summary found for release group id: %s", rgid)
return
params = {
"method": 'album.getInfo',
"api_key": api_key,
"artist": artist.encode('utf-8'),
"album": album.encode('utf-8')
}
url = 'http://ws.audioscrobbler.com/2.0/'
dom = helpers.request_minidom(url, timeout=20, params=params)
if not dom:
logger.debug("Could not parse album description from Last.FM")
return
if dom.getElementsByTagName("error"):
logger.debug("Last.FM returned error")
return
albuminfo = dom.getElementsByTagName("album")
logger.debug("Fetched %d albums from Last.FM", len(artists))
for item in albuminfo:
try:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
except:
logger.exception("Unhandled exception")
return
logger.debug("Added %d new artists from Last.FM", len(artistlist))