Added another token check that replaces punctuation with their relative ascii counterparts

This commit is contained in:
rembo10
2012-03-22 20:48:44 +00:00
parent 651dfaf6a4
commit b32b8ecf6b

View File

@@ -495,18 +495,23 @@ def verifyresult(title, artistterm, term):
# return False
#another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens:
cleantoken = ''.join(c for c in token if c not in string.punctuation)
if not token:
continue
if token == 'Various' or token == 'Artists' or token == 'VA':
continue
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
return False
return True
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens:
if not token:
continue
if token == 'Various' or token == 'Artists' or token == 'VA':
continue
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
cleantoken = ''.join(c for c in token if c not in string.punctuation)
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
dic = {'!':'i', '$':'s'}
dumbtoken = helpers.replace_all(token, dic))
if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
return False
return True
def getresultNZB(result):