Replaces urllib2 in importer, lastfm and lyrics

This commit is contained in:
Bas Stottelaar
2014-04-06 16:15:33 +02:00
parent a717594950
commit 55796324b6
3 changed files with 107 additions and 156 deletions

View File

@@ -20,7 +20,7 @@ import os
from beets.mediafile import MediaFile
import headphones
from headphones import logger, helpers, db, mb, albumart, lastfm
from headphones import logger, helpers, db, mb, lastfm
blacklisted_special_artist_names = ['[anonymous]','[data]','[no artist]','[traditional]','[unknown]','Various Artists']
blacklisted_special_artists = ['f731ccc4-e22a-43af-a747-64213329e088','33cf029c-63b0-41a0-9855-be2a3665fb3b',\

View File

@@ -13,138 +13,138 @@
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import urllib, urllib2
from xml.dom import minidom
from collections import defaultdict
import random
import time
import headphones
from headphones import db, logger
api_key = '395e6ec6bb557382fc41fde867bce66f'
from headphones import db, logger, helpers
from collections import defaultdict
API_KEY = '395e6ec6bb557382fc41fde867bce66f'
def getSimilar():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists ORDER BY HaveTracks DESC')
artistlist = []
for result in results[:12]:
url = 'http://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&mbid=%s&api_key=%s' % (result['ArtistID'], api_key)
try:
data = urllib2.urlopen(url, timeout=20).read()
except:
time.sleep(1)
params = {
"method": "artist.getsimilar",
"mbid": result['ArtistID'],
"api_key": API_KEY
}
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=params)
if not dom:
logger.debug("Could not parse similar artist data from Last.FM")
continue
if not data or len(data) < 200:
continue
try:
d = minidom.parseString(data)
except:
logger.debug("Could not parse similar artist data from last.fm")
node = d.documentElement
artists = d.getElementsByTagName("artist")
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
for artist in artists:
namenode = artist.getElementsByTagName("name")[0].childNodes
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in namenode:
artist_name = node.data
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append((artist_name, artist_mbid))
except:
except Exception:
logger.exception("Unhandled exception")
continue
count = defaultdict(int)
for artist, mbid in artistlist:
count[artist, mbid] += 1
items = count.items()
top_list = sorted(items, key=lambda x: x[1], reverse=True)[:25]
random.shuffle(top_list)
myDB.action('''DELETE from lastfmcloud''')
for tuple in top_list:
artist_name, artist_mbid = tuple[0]
count = tuple[1]
myDB.action('INSERT INTO lastfmcloud VALUES( ?, ?, ?)', [artist_name, artist_mbid, count])
def getArtists():
def getArtists():
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
if not headphones.LASTFM_USERNAME:
logger.warn("Last.FM username not set")
return
else:
username = headphones.LASTFM_USERNAME
logger.info("Starting Last.FM artists import with username '%s'", username)
params = {
"method": "library.getartists",
"limit": 10000,
"api_key": API_KEY,
"user": headphones.LASTFM_USERNAME
}
url = 'http://ws.audioscrobbler.com/2.0/?method=library.getartists&limit=10000&api_key=%s&user=%s' % (api_key, username)
data = urllib2.urlopen(url, timeout=20).read()
try:
d = minidom.parseString(data)
except:
logger.error("Could not parse artist list from last.fm data")
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=params)
if not dom:
logger.debug("Could not parse artist list from Last.FM")
return
artists = d.getElementsByTagName("artist")
logger.info("Fetched %d artists from Last.FM", len(artists))
artists = dom.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
for artist in artists:
mbidnode = artist.getElementsByTagName("mbid")[0].childNodes
for node in mbidnode:
artist_mbid = node.data
try:
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except:
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer
for artistid in artistlist:
importer.addArtisttoDB(artistid)
logger.info("Imported %d new artists from Last.FM", len(artistid))
def getTagTopArtists(tag, limit=50):
myDB = db.DBConnection()
results = myDB.select('SELECT ArtistID from artists')
url = 'http://ws.audioscrobbler.com/2.0/?method=tag.gettopartists&limit=%s&tag=%s&api_key=%s' % (limit, tag, api_key)
data = urllib2.urlopen(url, timeout=20).read()
params = {
"method": "tag.gettopartists",
"limit": limit,
"tag": tag,
"api_key": API_KEY
}
try:
d = minidom.parseString(data)
except:
logger.error("Could not parse artist list from Last.FM data")
url = 'http://ws.audioscrobbler.com/2.0/'
dom = request_minidom(url, timeout=20, params=param)
if not dom:
logger.debug("Could not parse artist list from Last.FM")
return
artists = d.getElementsByTagName("artist")
logger.debug("Fetched %d artists from Last.FM", len(artists))
artistlist = []
@@ -157,7 +157,8 @@ def getTagTopArtists(tag, limit=50):
try:
if not any(artist_mbid in x for x in results):
artistlist.append(artist_mbid)
except:
except Exception:
logger.exception("Unhandled exception")
continue
from headphones import importer
@@ -165,89 +166,49 @@ def getTagTopArtists(tag, limit=50):
for artistid in artistlist:
importer.addArtisttoDB(artistid)
def getAlbumDescription(rgid, artist, album):
myDB = db.DBConnection()
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
logger.info("No summary found for release group id: %s", rgid)
return
params = { "method": 'album.getInfo',
"api_key": api_key,
"artist": artist.encode('utf-8'),
"album": album.encode('utf-8')
}
searchURL = 'http://ws.audioscrobbler.com/2.0/?' + urllib.urlencode(params)
data = urllib2.urlopen(searchURL, timeout=20).read()
if data == '<?xml version="1.0" encoding="utf-8"?><lfm status="failed"><error code="6">Album not found</error></lfm>':
params = {
"method": 'album.getInfo',
"api_key": api_key,
"artist": artist.encode('utf-8'),
"album": album.encode('utf-8')
}
url = 'http://ws.audioscrobbler.com/2.0/'
dom = helpers.request_minidom(url, timeout=20, params=params)
if not dom:
logger.debug("Could not parse album description from Last.FM")
return
try:
d = minidom.parseString(data)
albuminfo = d.getElementsByTagName("album")
for item in albuminfo:
if dom.getElementsByTagName("error"):
logger.debug("Last.FM returned error")
return
albuminfo = dom.getElementsByTagName("album")
logger.debug("Fetched %d albums from Last.FM", len(artists))
for item in albuminfo:
try:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
except:
logger.exception("Unhandled exception")
return
def getAlbumDescriptionOld(rgid, releaselist):
"""
This was a dumb way to do it - going to just use artist & album name but keeping this here
because I may use it to fetch and cache album art
"""
myDB = db.DBConnection()
result = myDB.select('SELECT Summary from descriptions WHERE ReleaseGroupID=?', [rgid])
if result:
return
for release in releaselist:
mbid = release['releaseid']
url = 'http://ws.audioscrobbler.com/2.0/?method=album.getInfo&mbid=%s&api_key=%s' % (mbid, api_key)
data = urllib.urlopen(url).read()
if data == '<?xml version="1.0" encoding="utf-8"?><lfm status="failed"><error code="6">Album not found</error></lfm>':
continue
try:
d = minidom.parseString(data)
albuminfo = d.getElementsByTagName("album")
for item in albuminfo:
summarynode = item.getElementsByTagName("summary")[0].childNodes
contentnode = item.getElementsByTagName("content")[0].childNodes
for node in summarynode:
summary = node.data
for node in contentnode:
content = node.data
controlValueDict = {'ReleaseGroupID': rgid}
newValueDict = {'ReleaseID': mbid,
'Summary': summary,
newValueDict = {'Summary': summary,
'Content': content}
myDB.upsert("descriptions", newValueDict, controlValueDict)
break
except:
logger.exception("Unhandled exception")
continue
return

View File

@@ -14,11 +14,9 @@
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import re
import urllib, urllib2
from xml.dom import minidom
import htmlentitydefs
from headphones import logger
from headphones import logger, helpers
def getLyrics(artist, song):
@@ -26,22 +24,14 @@ def getLyrics(artist, song):
"song": song.encode('utf-8'),
"fmt": 'xml'
}
searchURL = 'http://lyrics.wikia.com/api.php?' + urllib.urlencode(params)
url = 'http://lyrics.wikia.com/api.php'
data = helpers.request_minidom(url, params)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except Exception, e:
logger.warn('Error opening: %s. Error: %s' % (searchURL, e))
if not data:
return
try:
parseddata = minidom.parseString(data)
except Exception, e:
logger.warn('Error parsing data from url: %s. Error: %s' % (searchURL, e))
return
url = parseddata.getElementsByTagName("url")
url = data.getElementsByTagName("url")
if url:
lyricsurl = url[0].firstChild.nodeValue
@@ -49,12 +39,12 @@ def getLyrics(artist, song):
logger.info('No lyrics found for %s - %s' % (artist, song))
return
try:
lyricspage = urllib.urlopen(lyricsurl).read()
except Exception, e:
lyricspage = helpers.request_content(lyricsurl)
if not lyricspage:
logger.warn('Error fetching lyrics from: %s. Error: %s' % (lyricsurl, e))
return
m = re.compile('''<div class='lyricbox'><div class='rtMatcher'>.*?</div>(.*?)<!--''').search(lyricspage)
if not m: