Attempt to weed out substring results. When searching for "Foo" we do

not want "Foobar"
This commit is contained in:
sbuser
2011-08-06 09:50:59 -05:00
parent a2708ec964
commit 7a013e818d
2 changed files with 20 additions and 12 deletions

View File

@@ -329,4 +329,6 @@ def addReleaseById(rid):
#start a search for the album
import searcher
searcher.searchNZB(rgid, False)
searcher.searchNZB(rgid, False)
else:
logger.info('Release ' + str(rid) + " already exists in the database!")

View File

@@ -335,17 +335,10 @@ def searchNZB(albumid=None, new=False):
logger.info('No results found from NEWZBIN for %s' % term)
#attempt to verify that this isn't a substring result
#when looking for "Avant - Avant" we don't want "Avantasia"
#when looking for "Foo - Foo" we don't want "Foobar"
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
#for reasons I can't explain this still fails on "Lavanttaler" - maybe you have a better regex?
if resultlist:
for result in resultlist:
if re.search('\w' + re.escape(artistterm), result[0], re.IGNORECASE):
logger.info("Removed from results: " + result[0] + " (substring result).")
resultlist.remove(result)
elif re.search(re.escape(artistterm) + '\w', result[0], re.IGNORECASE):
logger.info("Removed from results: " + result[0] + " (substring result).")
resultlist.remove(result)
if len(resultlist):
resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm)]
if len(resultlist):
@@ -472,4 +465,17 @@ def searchNZB(albumid=None, new=False):
break
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
def verifyresult(title, term):
if not re.search('^' + re.escape(term), title, re.IGNORECASE):
logger.info("Removed from results: " + title + " (artist not at string start).")
return False
elif re.search(re.escape(term) + '\w', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (post substring result).")
return False
elif re.search('\w' + re.escape(term), title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (pre substring result).")
return False
else:
return True