From a2708ec9641045bfa972d4f201fd3ef100aa45b7 Mon Sep 17 00:00:00 2001 From: sbuser Date: Fri, 5 Aug 2011 18:59:56 -0500 Subject: [PATCH 1/3] Attempt to weed out substring results in searcher. --- headphones/searcher.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 89a2fc93..ee1aa2eb 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -75,10 +75,12 @@ def searchNZB(albumid=None, new=False): dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':''} cleanartistalbum = helpers.latinToAscii(helpers.replace_all(albums[0]+' '+albums[1], dic)) + cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic)) # FLAC usually doesn't have a year for some reason so I'll leave it out: term = re.sub('[\.\-\/]', ' ', '%s' % (cleanartistalbum)).encode('utf-8') altterm = re.sub('[\.\-\/]', ' ', '%s %s' % (cleanartistalbum, year)).encode('utf-8') + artistterm = re.sub('[\.\-\/]', ' ', '%s' % (cleanartist)).encode('utf-8') # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4: @@ -331,9 +333,22 @@ def searchNZB(albumid=None, new=False): else: logger.info('No results found from NEWZBIN for %s' % term) + + #attempt to verify that this isn't a substring result + #when looking for "Avant - Avant" we don't want "Avantasia" + #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + #for reasons I can't explain this still fails on "Lavanttaler" - maybe you have a better regex? + if resultlist: + for result in resultlist: + if re.search('\w' + re.escape(artistterm), result[0], re.IGNORECASE): + logger.info("Removed from results: " + result[0] + " (substring result).") + resultlist.remove(result) + elif re.search(re.escape(artistterm) + '\w', result[0], re.IGNORECASE): + logger.info("Removed from results: " + result[0] + " (substring result).") + resultlist.remove(result) if len(resultlist): - + if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) From 69c98976a9ffc3e4c2d49e4d7205520e7991c334 Mon Sep 17 00:00:00 2001 From: Lyle Gordon Date: Sat, 6 Aug 2011 07:00:26 +0800 Subject: [PATCH 2/3] added solaris SMF manifest --- headphones.xml | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 headphones.xml diff --git a/headphones.xml b/headphones.xml new file mode 100644 index 00000000..4f1310cd --- /dev/null +++ b/headphones.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 7a013e818dee608fa0081fbc92be52410128fcd0 Mon Sep 17 00:00:00 2001 From: sbuser Date: Sat, 6 Aug 2011 09:50:59 -0500 Subject: [PATCH 3/3] Attempt to weed out substring results. When searching for "Foo" we do not want "Foobar" --- headphones/importer.py | 4 +++- headphones/searcher.py | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/headphones/importer.py b/headphones/importer.py index c3c6720d..a64e959e 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -329,4 +329,6 @@ def addReleaseById(rid): #start a search for the album import searcher - searcher.searchNZB(rgid, False) \ No newline at end of file + searcher.searchNZB(rgid, False) + else: + logger.info('Release ' + str(rid) + " already exists in the database!") \ No newline at end of file diff --git a/headphones/searcher.py b/headphones/searcher.py index ee1aa2eb..973c8143 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -335,17 +335,10 @@ def searchNZB(albumid=None, new=False): logger.info('No results found from NEWZBIN for %s' % term) #attempt to verify that this isn't a substring result - #when looking for "Avant - Avant" we don't want "Avantasia" + #when looking for "Foo - Foo" we don't want "Foobar" #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - #for reasons I can't explain this still fails on "Lavanttaler" - maybe you have a better regex? - if resultlist: - for result in resultlist: - if re.search('\w' + re.escape(artistterm), result[0], re.IGNORECASE): - logger.info("Removed from results: " + result[0] + " (substring result).") - resultlist.remove(result) - elif re.search(re.escape(artistterm) + '\w', result[0], re.IGNORECASE): - logger.info("Removed from results: " + result[0] + " (substring result).") - resultlist.remove(result) + if len(resultlist): + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm)] if len(resultlist): @@ -472,4 +465,17 @@ def searchNZB(albumid=None, new=False): break myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name]) \ No newline at end of file + myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name]) + +def verifyresult(title, term): + if not re.search('^' + re.escape(term), title, re.IGNORECASE): + logger.info("Removed from results: " + title + " (artist not at string start).") + return False + elif re.search(re.escape(term) + '\w', title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (post substring result).") + return False + elif re.search('\w' + re.escape(term), title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (pre substring result).") + return False + else: + return True