From 867e5024482421d153b859efe656994befc6c333 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Wed, 1 Jul 2015 02:06:27 -0700 Subject: [PATCH] Cache metacritic scores in the artists table so we don't lose them if we can't connect for whatever reason --- headphones/__init__.py | 7 +++++- headphones/importer.py | 2 +- headphones/metacritic.py | 48 +++++++++++++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/headphones/__init__.py b/headphones/__init__.py index ef2d4283..52630d5f 100644 --- a/headphones/__init__.py +++ b/headphones/__init__.py @@ -353,7 +353,7 @@ def dbcheck(): conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute( - 'CREATE TABLE IF NOT EXISTS artists (ArtistID TEXT UNIQUE, ArtistName TEXT, ArtistSortName TEXT, DateAdded TEXT, Status TEXT, IncludeExtras INTEGER, LatestAlbum TEXT, ReleaseDate TEXT, AlbumID TEXT, HaveTracks INTEGER, TotalTracks INTEGER, LastUpdated TEXT, ArtworkURL TEXT, ThumbURL TEXT, Extras TEXT, Type TEXT)') + 'CREATE TABLE IF NOT EXISTS artists (ArtistID TEXT UNIQUE, ArtistName TEXT, ArtistSortName TEXT, DateAdded TEXT, Status TEXT, IncludeExtras INTEGER, LatestAlbum TEXT, ReleaseDate TEXT, AlbumID TEXT, HaveTracks INTEGER, TotalTracks INTEGER, LastUpdated TEXT, ArtworkURL TEXT, ThumbURL TEXT, Extras TEXT, Type TEXT, MetaCritic TEXT)') # ReleaseFormat here means CD,Digital,Vinyl, etc. If using the default # Headphones hybrid release, ReleaseID will equal AlbumID (AlbumID is # releasegroup id) @@ -599,6 +599,11 @@ def dbcheck(): except sqlite3.OperationalError: c.execute('ALTER TABLE artists ADD COLUMN Type TEXT DEFAULT NULL') + try: + c.execute('SELECT MetaCritic from artists') + except sqlite3.OperationalError: + c.execute('ALTER TABLE artists ADD COLUMN MetaCritic TEXT DEFAULT NULL') + conn.commit() c.close() diff --git a/headphones/importer.py b/headphones/importer.py index dc6e490b..81b6bf2a 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -496,7 +496,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): cache.getThumb(ArtistID=artistid) logger.info(u"Fetching Metacritic reviews for: %s" % artist['artist_name']) - metacritic.update(artist['artist_name'], artist['releasegroups']) + metacritic.update(artistid, artist['artist_name'], artist['releasegroups']) if errors: logger.info("[%s] Finished updating artist: %s but with errors, so not marking it as updated in the database" % (artist['artist_name'], artist['artist_name'])) diff --git a/headphones/metacritic.py b/headphones/metacritic.py index 959282ca..21478800 100644 --- a/headphones/metacritic.py +++ b/headphones/metacritic.py @@ -14,16 +14,17 @@ # along with Headphones. If not, see . import re +import json import headphones from headphones import db, helpers, logger, request from headphones.common import USER_AGENT -def update(artist_name,release_groups): +def update(artistid, artist_name,release_groups): """ Pretty simple and crude function to find the artist page on metacritic, then parse that page to get critic & user scores for albums""" - # First let's modify the artist name to fit the metacritic convention. + # First let's modify the artist name to fit the metacritic convention. # We could just do a search, then take the top result, but at least this will # cut down on api calls. If it's ineffective then we'll switch to search @@ -38,24 +39,49 @@ def update(artist_name,release_groups): res = request.request_soup(url, headers=headers, parser='html.parser') + rows = None + try: rows = res.tbody.find_all('tr') except: logger.info("Unable to get metacritic scores for: %s" % artist_name) - return myDB = db.DBConnection() + artist = myDB.action('SELECT * FROM artists WHERE ArtistID=?', [artistid]).fetchone() - for row in rows: - title = row.a.string + score_list = [] + + # If we couldn't get anything from MetaCritic for whatever reason, + # let's try to load scores from the db + if not rows: + if artist['MetaCritic']: + score_list = json.loads(artist['MetaCritic']) + else: + return + + # If we did get scores, let's update the db with them + else: + for row in rows: + title = row.a.string + scores = row.find_all("span") + critic_score = scores[0].string + user_score = scores[1].string + score_dict = {'title':title,'critic_score':critic_score,'user_score':user_score} + score_list.append(score_dict) + + # Save scores to the database + controlValueDict = {"ArtistID": artistid} + newValueDict = {'MetaCritic':json.dumps(score_list)} + myDB.upsert("artists", newValueDict, controlValueDict) + + for score in score_list: + title = score['title'] + # Iterate through the release groups we got passed to see if we can find + # a match for rg in release_groups: if rg['title'].lower() == title.lower(): - scores = row.find_all("span") - critic_score = scores[0].string - user_score = scores[1].string + critic_score = score['critic_score'] + user_score = score['user_score'] controlValueDict = {"AlbumID": rg['id']} newValueDict = {'CriticScore':critic_score,'UserScore':user_score} myDB.upsert("albums", newValueDict, controlValueDict) - - -