From 74b8a88f29b6971eb795777fa88fa5d823822a76 Mon Sep 17 00:00:00 2001 From: sbuser Date: Wed, 10 Aug 2011 20:06:09 -0500 Subject: [PATCH 1/4] More substring result prevention. This time on the full term to prevent snatches on "Vol III" when we want "Vol II." --- headphones/searcher.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index c08ed287..c9f0fc68 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -345,7 +345,7 @@ def searchNZB(albumid=None, new=False): #when looking for "Foo - Foo" we don't want "Foobar" #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist if len(resultlist): - resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm)] + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)] if len(resultlist): @@ -433,24 +433,31 @@ def searchNZB(albumid=None, new=False): myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name]) -def verifyresult(title, term): +def verifyresult(title, artistterm, term): title = re.sub('[\.\-\/\_]', ' ', title) - if term == 'Various Artists': - return True - - if not re.search('^' + re.escape(term), title, re.IGNORECASE): - logger.info("Removed from results: " + title + " (artist not at string start).") - return False - elif re.search(re.escape(term) + '\w', title, re.IGNORECASE | re.UNICODE): - logger.info("Removed from results: " + title + " (post substring result).") - return False - elif re.search('\w' + re.escape(term), title, re.IGNORECASE | re.UNICODE): - logger.info("Removed from results: " + title + " (pre substring result).") - return False - else: - return True + if artistterm <> 'Various Artists': + + if not re.search('^' + re.escape(artistterm), title, re.IGNORECASE): + logger.info("Removed from results: " + title + " (artist not at string start).") + return False + elif re.search(re.escape(artistterm) + '\w', title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (post substring result).") + return False + elif re.search('\w' + re.escape(artistterm), title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (pre substring result).") + return False + + #another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II" + tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE) + for token in tokens: + if token == 'Various' or token == 'Artists' or token == 'VA': + continue + if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (missing token: " + token + ")") + return False + return True def getresultNZB(result): if result[3] == 'newzbin': From 976311613b4e50031954805ab0b2d1dc348fce9d Mon Sep 17 00:00:00 2001 From: sbuser Date: Wed, 10 Aug 2011 20:11:14 -0500 Subject: [PATCH 2/4] Added '*' to the list of characters removed from search terms. This is a wildcard at NZBMatrix that was returning 100 hits. --- headphones/searcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index c9f0fc68..2ac8015a 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -70,7 +70,7 @@ def searchNZB(albumid=None, new=False): except TypeError: year = '' - dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':''} + dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''} cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic)) cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic)) From c2768ae1f06e3a9a487014909773eb65bf45f709 Mon Sep 17 00:00:00 2001 From: sbuser Date: Wed, 10 Aug 2011 20:37:30 -0500 Subject: [PATCH 3/4] Added special character '*' to list of exceptions in mb.findArtist query. Why are you only quoting the queries with special characters? Better results are had with quoting every time because of how lucene works imo --- headphones/mb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/mb.py b/headphones/mb.py index 1d6b20cd..7acb339f 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -24,7 +24,7 @@ def findArtist(name, limit=1): attempt = 0 artistResults = None - chars = set('!?') + chars = set('!?*') if any((c in chars) for c in name): name = '"'+name+'"' From 05ee998de71cb307e58a98e62879e3735166d376 Mon Sep 17 00:00:00 2001 From: sbuser Date: Wed, 10 Aug 2011 20:48:09 -0500 Subject: [PATCH 4/4] Error message for people who don't set their download directory and then wonder why there's no post-processing. --- headphones/postprocessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index d8b8cd14..a7c4a737 100644 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -442,6 +442,7 @@ def renameUnprocessedFolder(albumpath): def forcePostProcess(): if not headphones.DOWNLOAD_DIR: + logger.error('No DOWNLOAD_DIR has been set. Set "Music Download Directory:" to your SAB download directory on the settings page.') return else: download_dir = headphones.DOWNLOAD_DIR