Fixed a problem with token matching with words with stange characters/punctuation to the stripped search results

This commit is contained in:
rembo10
2012-03-22 20:27:53 +00:00
parent dfffe542be
commit 651dfaf6a4

View File

@@ -6,6 +6,7 @@ from StringIO import StringIO
import gzip
import os, re, time
import string
import headphones, exceptions
from headphones import logger, db, helpers, classes, sab
@@ -496,14 +497,15 @@ def verifyresult(title, artistterm, term):
#another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens:
cleantoken = ''.join(c for c in token if c not in string.punctuation)
if not token:
continue
if token == 'Various' or token == 'Artists' or token == 'VA':
continue
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
if not re.search('(?:\W|^)+' + token.replace("'","") + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + token.replace("'","") + ")")
return False
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
return False
return True
def getresultNZB(result):