From b32b8ecf6b227954279cc09dc62cff8454dbb84a Mon Sep 17 00:00:00 2001 From: rembo10 Date: Thu, 22 Mar 2012 20:48:44 +0000 Subject: [PATCH] Added another token check that replaces punctuation with their relative ascii counterparts --- headphones/searcher.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 61099f88..c72e7718 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -495,18 +495,23 @@ def verifyresult(title, artistterm, term): # return False #another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II" - tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE) - for token in tokens: - cleantoken = ''.join(c for c in token if c not in string.punctuation) - if not token: - continue - if token == 'Various' or token == 'Artists' or token == 'VA': - continue - if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): - if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): - logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")") - return False - return True + tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE) + for token in tokens: + + if not token: + continue + if token == 'Various' or token == 'Artists' or token == 'VA': + continue + if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + cleantoken = ''.join(c for c in token if c not in string.punctuation) + if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + dic = {'!':'i', '$':'s'} + dumbtoken = helpers.replace_all(token, dic)) + if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")") + return False + + return True def getresultNZB(result):