diff --git a/headphones/importer.py b/headphones/importer.py index 3b4179af..d14466b5 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -20,7 +20,7 @@ import os from beets.mediafile import MediaFile import headphones -from headphones import logger, helpers, db, mb, albumart, lastfm +from headphones import logger, helpers, db, mb, lastfm blacklisted_special_artist_names = ['[anonymous]','[data]','[no artist]','[traditional]','[unknown]','Various Artists'] blacklisted_special_artists = ['f731ccc4-e22a-43af-a747-64213329e088','33cf029c-63b0-41a0-9855-be2a3665fb3b',\ diff --git a/headphones/lastfm.py b/headphones/lastfm.py index dcb6207b..71ded140 100644 --- a/headphones/lastfm.py +++ b/headphones/lastfm.py @@ -13,138 +13,138 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import urllib, urllib2 -from xml.dom import minidom -from collections import defaultdict import random import time - import headphones -from headphones import db, logger -api_key = '395e6ec6bb557382fc41fde867bce66f' +from headphones import db, logger, helpers + +from collections import defaultdict + +API_KEY = '395e6ec6bb557382fc41fde867bce66f' - def getSimilar(): - myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC') - + artistlist = [] - + for result in results[:12]: - - url = 'http://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&mbid=%s&api_key=%s' % (result['ArtistID'], api_key) - - try: - data = urllib2.urlopen(url, timeout=20).read() - except: - time.sleep(1) + params = { + "method": "artist.getsimilar", + "mbid": result['ArtistID'], + "api_key": API_KEY + } + + url = 'http://ws.audioscrobbler.com/2.0/' + dom = request_minidom(url, timeout=20, params=params) + + if not dom: + logger.debug("Could not parse similar artist data from Last.FM") continue - - if not data or len(data) < 200: - continue - - try: - d = minidom.parseString(data) - except: - logger.debug("Could not parse similar artist data from last.fm") - - node = d.documentElement - artists = d.getElementsByTagName("artist") - + + artists = dom.getElementsByTagName("artist") + logger.debug("Fetched %d artists from Last.FM", len(artists)) + for artist in artists: namenode = artist.getElementsByTagName("name")[0].childNodes mbidnode = artist.getElementsByTagName("mbid")[0].childNodes - + for node in namenode: artist_name = node.data for node in mbidnode: artist_mbid = node.data - + try: if not any(artist_mbid in x for x in results): artistlist.append((artist_name, artist_mbid)) - except: + except Exception: + logger.exception("Unhandled exception") continue - + count = defaultdict(int) - + for artist, mbid in artistlist: count[artist, mbid] += 1 - + items = count.items() - top_list = sorted(items, key=lambda x: x[1], reverse=True)[:25] - + random.shuffle(top_list) - + myDB.action('''DELETE from lastfmcloud''') for tuple in top_list: artist_name, artist_mbid = tuple[0] count = tuple[1] myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count]) - -def getArtists(): +def getArtists(): myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists') if not headphones.LASTFM_USERNAME: + logger.warn("Last.FM username not set") return - - else: - username = headphones.LASTFM_USERNAME - logger.info("Starting Last.FM artists import with username '%s'", username) + params = { + "method": "library.getartists", + "limit": 10000, + "api_key": API_KEY, + "user": headphones.LASTFM_USERNAME + } - url = 'http://ws.audioscrobbler.com/2.0/?method=library.getartists&limit=10000&api_key=%s&user=%s' % (api_key, username) - data = urllib2.urlopen(url, timeout=20).read() - - try: - d = minidom.parseString(data) - except: - logger.error("Could not parse artist list from last.fm data") + url = 'http://ws.audioscrobbler.com/2.0/' + dom = request_minidom(url, timeout=20, params=params) + + if not dom: + logger.debug("Could not parse artist list from Last.FM") return - - artists = d.getElementsByTagName("artist") - logger.info("Fetched %d artists from Last.FM", len(artists)) - + + artists = dom.getElementsByTagName("artist") + logger.debug("Fetched %d artists from Last.FM", len(artists)) + artistlist = [] - + for artist in artists: mbidnode = artist.getElementsByTagName("mbid")[0].childNodes for node in mbidnode: artist_mbid = node.data - + try: if not any(artist_mbid in x for x in results): artistlist.append(artist_mbid) - except: + except Exception: + logger.exception("Unhandled exception") continue - + from headphones import importer - + for artistid in artistlist: importer.addArtisttoDB(artistid) logger.info("Imported %d new artists from Last.FM", len(artistid)) - + def getTagTopArtists(tag, limit=50): myDB = db.DBConnection() results = myDB.select('SELECT ArtistID from artists') - url = 'http://ws.audioscrobbler.com/2.0/?method=tag.gettopartists&limit=%s&tag=%s&api_key=%s' % (limit, tag, api_key) - data = urllib2.urlopen(url, timeout=20).read() + params = { + "method": "tag.gettopartists", + "limit": limit, + "tag": tag, + "api_key": API_KEY + } - try: - d = minidom.parseString(data) - except: - logger.error("Could not parse artist list from Last.FM data") + url = 'http://ws.audioscrobbler.com/2.0/' + dom = request_minidom(url, timeout=20, params=param) + + if not dom: + logger.debug("Could not parse artist list from Last.FM") return artists = d.getElementsByTagName("artist") + logger.debug("Fetched %d artists from Last.FM", len(artists)) artistlist = [] @@ -157,7 +157,8 @@ def getTagTopArtists(tag, limit=50): try: if not any(artist_mbid in x for x in results): artistlist.append(artist_mbid) - except: + except Exception: + logger.exception("Unhandled exception") continue from headphones import importer @@ -165,89 +166,49 @@ def getTagTopArtists(tag, limit=50): for artistid in artistlist: importer.addArtisttoDB(artistid) - def getAlbumDescription(rgid, artist, album): - - myDB = db.DBConnection() + myDB = db.DBConnection() result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid]) - + if result: + logger.info("No summary found for release group id: %s", rgid) return - - params = { "method": 'album.getInfo', - "api_key": api_key, - "artist": artist.encode('utf-8'), - "album": album.encode('utf-8') - } - searchURL = 'http://ws.audioscrobbler.com/2.0/?' + urllib.urlencode(params) - data = urllib2.urlopen(searchURL, timeout=20).read() - - if data == 'Album not found': + params = { + "method": 'album.getInfo', + "api_key": api_key, + "artist": artist.encode('utf-8'), + "album": album.encode('utf-8') + } + + url = 'http://ws.audioscrobbler.com/2.0/' + dom = helpers.request_minidom(url, timeout=20, params=params) + + if not dom: + logger.debug("Could not parse album description from Last.FM") return - - try: - d = minidom.parseString(data) - albuminfo = d.getElementsByTagName("album") - - for item in albuminfo: + if dom.getElementsByTagName("error"): + logger.debug("Last.FM returned error") + return + + albuminfo = dom.getElementsByTagName("album") + logger.debug("Fetched %d albums from Last.FM", len(artists)) + + for item in albuminfo: + try: summarynode = item.getElementsByTagName("summary")[0].childNodes contentnode = item.getElementsByTagName("content")[0].childNodes + for node in summarynode: summary = node.data for node in contentnode: content = node.data - - controlValueDict = {'ReleaseGroupID': rgid} - newValueDict = {'Summary': summary, - 'Content': content} - myDB.upsert("descriptions", newValueDict, controlValueDict) - - except: - logger.exception("Unhandled exception") - return - -def getAlbumDescriptionOld(rgid, releaselist): - """ - This was a dumb way to do it - going to just use artist & album name but keeping this here - because I may use it to fetch and cache album art - """ - - myDB = db.DBConnection() - result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid]) - - if result: - return - - for release in releaselist: - - mbid = release['releaseid'] - url = 'http://ws.audioscrobbler.com/2.0/?method=album.getInfo&mbid=%s&api_key=%s' % (mbid, api_key) - data = urllib.urlopen(url).read() - - if data == 'Album not found': - continue - - try: - d = minidom.parseString(data) - - albuminfo = d.getElementsByTagName("album") - - for item in albuminfo: - summarynode = item.getElementsByTagName("summary")[0].childNodes - contentnode = item.getElementsByTagName("content")[0].childNodes - for node in summarynode: - summary = node.data - for node in contentnode: - content = node.data controlValueDict = {'ReleaseGroupID': rgid} - newValueDict = {'ReleaseID': mbid, - 'Summary': summary, + newValueDict = {'Summary': summary, 'Content': content} myDB.upsert("descriptions", newValueDict, controlValueDict) - break except: logger.exception("Unhandled exception") - continue \ No newline at end of file + return \ No newline at end of file diff --git a/headphones/lyrics.py b/headphones/lyrics.py index 6899359a..c51e224d 100644 --- a/headphones/lyrics.py +++ b/headphones/lyrics.py @@ -14,11 +14,9 @@ # along with Headphones. If not, see . import re -import urllib, urllib2 -from xml.dom import minidom import htmlentitydefs -from headphones import logger +from headphones import logger, helpers def getLyrics(artist, song): @@ -26,22 +24,14 @@ def getLyrics(artist, song): "song": song.encode('utf-8'), "fmt": 'xml' } - - searchURL = 'http://lyrics.wikia.com/api.php?' + urllib.urlencode(params) + + url = 'http://lyrics.wikia.com/api.php' + data = helpers.request_minidom(url, params) - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except Exception, e: - logger.warn('Error opening: %s. Error: %s' % (searchURL, e)) + if not data: return - try: - parseddata = minidom.parseString(data) - except Exception, e: - logger.warn('Error parsing data from url: %s. Error: %s' % (searchURL, e)) - return - - url = parseddata.getElementsByTagName("url") + url = data.getElementsByTagName("url") if url: lyricsurl = url[0].firstChild.nodeValue @@ -49,12 +39,12 @@ def getLyrics(artist, song): logger.info('No lyrics found for %s - %s' % (artist, song)) return - try: - lyricspage = urllib.urlopen(lyricsurl).read() - except Exception, e: + lyricspage = helpers.request_content(lyricsurl) + + if not lyricspage: logger.warn('Error fetching lyrics from: %s. Error: %s' % (lyricsurl, e)) return - + m = re.compile('''
.*?
(.*?)