Refactored Last.FM code

This commit is contained in:
Bas Stottelaar
2014-04-06 23:30:04 +02:00
parent 41cb84957e
commit 21e1c100e7
3 changed files with 90 additions and 186 deletions

View File

@@ -19,7 +19,7 @@ import glob, urllib
import lib.simplejson as simplejson import lib.simplejson as simplejson
import headphones import headphones
from headphones import db, helpers, logger from headphones import db, helpers, logger, lastfm
lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4" lastfm_apikey = "690e1ed3bc00bc91804cd8f7fe5ed6d4"
@@ -210,15 +210,7 @@ class Cache(object):
if ArtistID: if ArtistID:
self.id_type = 'artist' self.id_type = 'artist'
data = lastfm.request_lastfm("artist.getinfo", mbid=ArtistID, api_key=lastfm_apikey)
params = { "method": "artist.getInfo",
"api_key": lastfm_apikey,
"mbid": ArtistID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, params=params, timeout=20)
if not data: if not data:
return return
@@ -226,25 +218,17 @@ class Cache(object):
try: try:
image_url = data['artist']['image'][-1]['#text'] image_url = data['artist']['image'][-1]['#text']
except Exception: except Exception:
logger.debug('No artist image found on url: %s', url) logger.debug('No artist image found')
image_url = None image_url = None
thumb_url = self._get_thumb_url(data) thumb_url = self._get_thumb_url(data)
if not thumb_url: if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url) logger.debug('No artist thumbnail image found')
else: else:
self.id_type = 'album' self.id_type = 'album'
data = lastfm.request_lastfm("album.getinfo", mbid=AlbumID, api_key=lastfm_apikey)
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"mbid": AlbumID,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/?" + urllib.urlencode(params)
data = helpers.request_json(url, params=params, timeout=20)
if not data: if not data:
return return
@@ -252,13 +236,13 @@ class Cache(object):
try: try:
image_url = data['artist']['image'][-1]['#text'] image_url = data['artist']['image'][-1]['#text']
except Exception: except Exception:
logger.debug('No artist image found on url: %s', url) logger.debug('No artist image found')
image_url = None image_url = None
thumb_url = self._get_thumb_url(data) thumb_url = self._get_thumb_url(data)
if not thumb_url: if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url) logger.debug('No artist thumbnail image found')
return {'artwork' : image_url, 'thumbnail' : thumb_url } return {'artwork' : image_url, 'thumbnail' : thumb_url }
@@ -271,14 +255,7 @@ class Cache(object):
# Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums # Since lastfm uses release ids rather than release group ids for albums, we have to do a artist + album search for albums
if self.id_type == 'artist': if self.id_type == 'artist':
params = { "method": "artist.getInfo", data = lastfm.request_lastfm("artist.getinfo", mbid=self.id, api_key=lastfm_apikey)
"api_key": lastfm_apikey,
"mbid": self.id,
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, timeout=20, params=params)
if not data: if not data:
return return
@@ -286,36 +263,27 @@ class Cache(object):
try: try:
self.info_summary = data['artist']['bio']['summary'] self.info_summary = data['artist']['bio']['summary']
except Exception: except Exception:
logger.debug('No artist bio summary found on url: %s', url) logger.debug('No artist bio summary found')
self.info_summary = None self.info_summary = None
try: try:
self.info_content = data['artist']['bio']['content'] self.info_content = data['artist']['bio']['content']
except Exception: except Exception:
logger.debug('No artist bio found on url: %s', url) logger.debug('No artist bio found')
self.info_content = None self.info_content = None
try: try:
image_url = data['artist']['image'][-1]['#text'] image_url = data['artist']['image'][-1]['#text']
except Exception: except Exception:
logger.debug('No artist image found on url: %s', url) logger.debug('No artist image found')
image_url = None image_url = None
thumb_url = self._get_thumb_url(data) thumb_url = self._get_thumb_url(data)
if not thumb_url: if not thumb_url:
logger.debug('No artist thumbnail image found on url: %s', url) logger.debug('No artist thumbnail image found')
else: else:
dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone() dbartist = myDB.action('SELECT ArtistName, AlbumTitle FROM albums WHERE AlbumID=?', [self.id]).fetchone()
data = lastfm.request_lastfm("album.getinfo", artist=dbartist['ArtistName'], album=dbartist['AlbumTitle'], api_key=lastfm_apikey)
params = { "method": "album.getInfo",
"api_key": lastfm_apikey,
"artist": dbartist['ArtistName'].encode('utf-8'),
"album": dbartist['AlbumTitle'].encode('utf-8'),
"format": "json"
}
url = "http://ws.audioscrobbler.com/2.0/"
data = helpers.request_json(url, timeout=20, params=params)
if not data: if not data:
return return
@@ -323,23 +291,23 @@ class Cache(object):
try: try:
self.info_summary = data['album']['wiki']['summary'] self.info_summary = data['album']['wiki']['summary']
except Exception: except Exception:
logger.debug('No album summary found from: %s', url) logger.debug('No album summary found')
self.info_summary = None self.info_summary = None
try: try:
self.info_content = data['album']['wiki']['content'] self.info_content = data['album']['wiki']['content']
except Exception: except Exception:
logger.debug('No album infomation found from: %s', url) logger.debug('No album infomation found')
self.info_content = None self.info_content = None
try: try:
image_url = data['album']['image'][-1]['#text'] image_url = data['album']['image'][-1]['#text']
except Exception: except Exception:
logger.debug('No album image link found on url: %s', url) logger.debug('No album image link found')
image_url = None image_url = None
thumb_url = self._get_thumb_url(data) thumb_url = self._get_thumb_url(data)
if not thumb_url: if not thumb_url:
logger.debug('No album thumbnail image found on url: %s', url) logger.debug('No album thumbnail image found')
#Save the content & summary to the database no matter what if we've opened up the url #Save the content & summary to the database no matter what if we've opened up the url
if self.id_type == 'artist': if self.id_type == 'artist':

View File

@@ -646,10 +646,11 @@ def request_soup(url, **kwargs):
no exceptions are raised. no exceptions are raised.
""" """
parser = kwargs.pop("parser", "html5lib")
response = request_response(url, **kwargs) response = request_response(url, **kwargs)
if response is not None: if response is not None:
return BeautifulSoup(response.content, "html5lib") return BeautifulSoup(response.content, parser)
def request_minidom(url, **kwargs): def request_minidom(url, **kwargs):
""" """

View File

@@ -21,47 +21,60 @@ from headphones import db, logger, helpers
from collections import defaultdict from collections import defaultdict
ENTRY_POINT = 'http://ws.audioscrobbler.com/2.0/'
API_KEY = '395e6ec6bb557382fc41fde867bce66f' API_KEY = '395e6ec6bb557382fc41fde867bce66f'
def request_lastfm(method, **kwargs):
"""
Call a Last.FM API method. Automatically sets the method and API key. Method
will return the result if no error occured.
By default, this method will request the JSON format, since it is lighter
than XML.
"""
# Prepare request
kwargs["method"] = method
kwargs.setdefault("api_key", API_KEY)
kwargs.setdefault("format", "json")
# Send request
logger.debug("Calling Last.FM method: %s", method)
data = helpers.request_json(ENTRY_POINT, timeout=20, params=kwargs)
# Parse response and check for errors.
if not data:
logger.error("Error calling Last.FM method: %s", method)
return
if "error" in data:
logger.debug("Last.FM returned an error: %s", data["message"])
return
return data
def getSimilar(): def getSimilar():
myDB = db.DBConnection() myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC') results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')
logger.info("Fetching similar artists from Last.FM for tag cloud")
artistlist = [] artistlist = []
for result in results[:12]: for result in results[:12]:
params = { data = request_lastfm("artist.getsimilar", mbid=result['ArtistId'])
"method": "artist.getsimilar",
"mbid": result['ArtistID'],
"api_key": API_KEY
}
url = 'http://ws.audioscrobbler.com/2.0/' if data and "similarartists" in data:
dom = request_minidom(url, timeout=20, params=params) artists = data["similarartists"]["artist"]
if not dom: for artist in artists:
logger.debug("Could not parse similar artist data from Last.FM") artist_mbid = artist["mbid"]
continue artist_name = artist["name"]
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
for artist in artists:
namenode = artist.getElementsByTagName("name")[0].childNodes
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in namenode:
artist_name = node.data
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results): if not any(artist_mbid in x for x in results):
artistlist.append((artist_name, artist_mbid)) artistlist.append((artist_name, artist_mbid))
except Exception:
logger.exception("Unhandled exception")
continue
# Add new artists to tag cloud
logger.debug("Fetched %d artists from Last.FM", len(artistlist))
count = defaultdict(int) count = defaultdict(int)
for artist, mbid in artistlist: for artist, mbid in artistlist:
@@ -72,143 +85,65 @@ def getSimilar():
random.shuffle(top_list) random.shuffle(top_list)
myDB.action('''DELETE from lastfmcloud''') myDB.action("DELETE from lastfmcloud")
for tuple in top_list: for item in top_list:
artist_name, artist_mbid = tuple[0] artist_name, artist_mbid = item[0]
count = tuple[1] count = item[1]
myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count]) myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
logger.debug("Inserted %d artists into Last.FM tag cloud", len(top_list))
def getArtists(): def getArtists():
myDB = db.DBConnection() myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists') results = myDB.select('SELECT ArtistID from artists')
if not headphones.LASTFM_USERNAME: if not headphones.LASTFM_USERNAME:
logger.warn("Last.FM username not set") logger.warn("Last.FM username not set, not importing artists.")
return return
params = { logger.info("Fetching artists from Last.FM for username: %s", headphones.LASTFM_USERNAME)
"method": "library.getartists", data = request_lastfm("library.getartists", limit=10000, user=headphones.LASTFM_USERNAME)
"limit": 10000,
"api_key": API_KEY,
"user": headphones.LASTFM_USERNAME
}
url = 'http://ws.audioscrobbler.com/2.0/' if data and "artists" in data:
dom = request_minidom(url, timeout=20, params=params) artistlist = []
artists = data["artists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
if not dom: for artist in artists:
logger.debug("Could not parse artist list from Last.FM") artist_mbid = artist["mbid"]
return
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results): if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid) artistlist.append(artist_mbid)
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer from headphones import importer
for artistid in artistlist: for artistid in artistlist:
importer.addArtisttoDB(artistid) importer.addArtisttoDB(artistid)
logger.info("Imported %d new artists from Last.FM", len(artistid)) logger.info("Imported %d new artists from Last.FM", len(artistlist))
def getTagTopArtists(tag, limit=50): def getTagTopArtists(tag, limit=50):
myDB = db.DBConnection() myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists') results = myDB.select('SELECT ArtistID from artists')
params = { logger.info("Fetching top artists from Last.FM for tag: %s", tag)
"method": "tag.gettopartists", data = request_lastfm("tag.gettopartists", limit=limit, tag=tag)
"limit": limit,
"tag": tag,
"api_key": API_KEY
}
url = 'http://ws.audioscrobbler.com/2.0/' if data and "topartists" in data:
dom = request_minidom(url, timeout=20, params=param) artistlist = []
artists = data["topartists"]["artist"]
logger.debug("Fetched %d artists from Last.FM", len(artists))
if not dom: for artist in artists:
logger.debug("Could not parse artist list from Last.FM") artist_mbid = artist["mbid"]
return
artists = d.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results): if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid) artistlist.append(artist_mbid)
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer from headphones import importer
for artistid in artistlist: for artistid in artistlist:
importer.addArtisttoDB(artistid) importer.addArtisttoDB(artistid)
def getAlbumDescription(rgid, artist, album): logger.debug("Added %d new artists from Last.FM", len(artistlist))
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
logger.info("No summary found for release group id: %s", rgid)
return
params = {
"method": 'album.getInfo',
"api_key": api_key,
"artist": artist.encode('utf-8'),
"album": album.encode('utf-8')
}
url = 'http://ws.audioscrobbler.com/2.0/'
dom = helpers.request_minidom(url, timeout=20, params=params)
if not dom:
logger.debug("Could not parse album description from Last.FM")
return
if dom.getElementsByTagName("error"):
logger.debug("Last.FM returned error")
return
albuminfo = dom.getElementsByTagName("album")
logger.debug("Fetched %d albums from Last.FM", len(artists))
for item in albuminfo:
try:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
except:
logger.exception("Unhandled exception")
return