Fixed a problem with token matching with words with stange characters/punctuation to the stripped search results

2026-05-10 05:39:35 +01:00 · 2012-03-22 20:27:53 +00:00
parent dfffe542be
commit 651dfaf6a4
1 changed files with 5 additions and 3 deletions
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -6,6 +6,7 @@ from StringIO import StringIO
 import gzip

 import os, re, time
+import string

 import headphones, exceptions
 from headphones import logger, db, helpers, classes, sab
@@ -496,14 +497,15 @@ def verifyresult(title, artistterm, term):
    #another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
    tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
    for token in tokens:
+    	cleantoken = ''.join(c for c in token if c not in string.punctuation)
    	if not token:
    		continue
        if token == 'Various' or token == 'Artists' or token == 'VA':
            continue
        if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
-            if not re.search('(?:\W|^)+' + token.replace("'","") + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):	
-                logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + token.replace("'","") + ")")
-                return False
+        	if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
+            	logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
+            	return False
    return True

 def getresultNZB(result):