Added searcher words to the config page, added required & ignored words code to searcher.py, got rid of old newzbin & nzbmatrix code in searcher.py

This commit is contained in:
rembo10
2013-01-19 07:17:48 -05:00
parent 1c1e2e0bdc
commit 65d4181f89
3 changed files with 40 additions and 171 deletions

View File

@@ -362,6 +362,25 @@
</div>
</fieldset>
<fieldset>
<legend>Search Words</legend>
<small>Separate words with a comma, e.g. "word1,word2,word3"</small>
<div class="row">
<label>Ignored Words</label>
<input type="text" name="ignored_words" value="${config['ignored_words']}" size="50">
<small>Results with any of these words in the title will be filtered out</small>
</div>
<div class="row">
<label>Preferred Words</label>
<input type="text" name="preferred_words" value="${config['preferred_words']}" size="50">
<small>Results with these words in the title will be preferred over results without them</small>
</div>
<div class="row">
<label>Required Words</label>
<input type="text" name="required_words" value="${config['required_words']}" size="50">
<small>Results without these words in the title will be filtered out</small>
</div>
</fieldset>
</td>
<td>
<fieldset>
@@ -394,31 +413,6 @@
</fieldset>
</td>
</tr>
<tr>
<td>
<fieldset>
<legend>Quality</legend>
<div class="row radio clearfix">
<input type="radio" name="preferred_quality" value="0" ${config['pref_qual_0']} /><label>Highest Quality excluding Lossless</label>
<input type="radio" name="preferred_quality" value="1" ${config['pref_qual_1']} /><label>Highest Quality including Lossless</label>
<input type="radio" name="preferred_quality" value="3" ${config['pref_qual_3']} /><label>Lossless Only</label>
<input type="radio" id="preferred_bitrate" name="preferred_quality" value="2" ${config['pref_qual_2']} />Preferred Bitrate: <input type="text" class="override-float" name="preferred_bitrate" value="${config['pref_bitrate']}" size="3">kbps<br>
<div id="preferred_bitrate_options">
Reject if <strong>less than</strong> <input type="text" class="override-float" name="preferred_bitrate_low_buffer" value="${config['pref_bitrate_low']}" size="3">% or <strong>more than</strong> <input type="text" class="override-float" name="preferred_bitrate_high_buffer" value="${config['pref_bitrate_high']}" size="3">% of the target size (leave blank for no limit)<br><br>
<div class="row checkbox left">
<input type="checkbox" name="preferred_bitrate_allow_lossless" value="1" ${config['pref_bitrate_allow_lossless']} />
<label>Allow lossless if no good lossy match found</label>
</div>
<div class="row checkbox left">
<input type="checkbox" name="detect_bitrate" value="1" ${config['detect_bitrate']} />
<label>Auto-Detect Preferred Bitrate</label>
</div>
</div>
</div>
</fieldset>
</td>
</tr>
</table>
<input type="button" class="configsubmit" value="Save Changes" onclick="doAjaxCall('configUpdate',$(this),'tabs',true);return false;" data-success="Changes saved successfully">
</div>

View File

@@ -322,3 +322,9 @@ def sab_sanitize_foldername(name):
# name = name[:maxlen]
return name
def split_string(mystring):
mylist = []
for each_word in mystring.split(','):
mylist.append(each_word.strip())
return mylist

View File

@@ -109,7 +109,7 @@ def searchforalbum(albumid=None, new=False, lossless=False):
for result in results:
foundNZB = "none"
if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN or headphones.NZBX or headphones.NZBSRUS) and (headphones.SAB_HOST or headphones.BLACKHOLE):
if (headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBX or headphones.NZBSRUS) and (headphones.SAB_HOST or headphones.BLACKHOLE):
if result['Status'] == "Wanted Lossless":
foundNZB = searchNZB(result['AlbumID'], new, losslessOnly=True)
else:
@@ -179,66 +179,6 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
logger.info("Searching for %s since it was marked as wanted" % term)
resultlist = []
# if headphones.NZBMATRIX:
# provider = "nzbmatrix"
# if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
# categories = "23"
# elif headphones.PREFERRED_QUALITY:
# categories = "23,22"
# else:
# categories = "22"
#
# # Search Audiobooks/Singles/etc
# if albums['Type'] == "Other":
# categories = "49"
# logger.info("Album type is audiobook/spokenword. Using audiobook category")
# if albums['Type'] == "Single":
# categories = "47"
# logger.info("Album type is 'Single'. Using singles category")
#
# # For some reason NZBMatrix is erroring out/timing out when the term starts with a "The" right now
# # so we'll strip it out for the time being. This may get fixed on their end, it may not, but
# # hopefully this will fix it for now. If you notice anything else it gets stuck on, please post it
# # on Github so it can be added
# if term.lower().startswith("the "):
# term = term[4:]
#
#
# params = { "page": "download",
# "username": headphones.NZBMATRIX_USERNAME,
# "apikey": headphones.NZBMATRIX_APIKEY,
# "subcat": categories,
# "maxage": headphones.USENET_RETENTION,
# "english": 1,
# "ssl": 1,
# "scenename": 1,
# "term": term
# }
#
# searchURL = "https://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params)
# logger.info(u'Parsing results from <a href="%s">NZBMatrix</a>' % searchURL)
# try:
# data = urllib2.urlopen(searchURL, timeout=20).read()
# except urllib2.URLError, e:
# logger.warn('Error fetching data from NZBMatrix: %s' % e)
# data = False
#
# if data:
#
# d = feedparser.parse(data)
#
# for item in d.entries:
# try:
# url = item.link
# title = item.title
# size = int(item.links[1]['length'])
#
# resultlist.append((title, size, url, provider))
# logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
#
# except AttributeError, e:
# logger.info(u"No results found from NZBMatrix for %s" % term)
if headphones.NEWZNAB:
@@ -476,88 +416,6 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
except Exception, e:
logger.error(u"An unknown error occurred trying to parse the feed: %s" % e)
# if headphones.NEWZBIN:
# provider = "newzbin"
# providerurl = "https://www.newzbin2.es/"
# if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
# categories = "7" #music
# format = "2" #flac
# elif headphones.PREFERRED_QUALITY:
# categories = "7" #music
# format = "10" #mp3+flac
# else:
# categories = "7" #music
# format = "8" #mp3
#
# if albums['Type'] == 'Other':
# categories = "13"
# format = "16"
# logger.info("Album type is audiobook/spokenword. Using audiobook category")
#
# params = {
# "fpn": "p",
# 'u_nfo_posts_only': 0,
# 'u_url_posts_only': 0,
# 'u_comment_posts_only': 0,
# 'u_show_passworded': 0,
# "searchaction": "Search",
# #"dl": 1,
# "category": categories,
# "retention": headphones.USENET_RETENTION,
# "ps_rb_audio_format": format,
# "feed": "rss",
# "u_post_results_amt": 50, #this can default to a high number per user
# "hauth": 1,
# "q": term
# }
# searchURL = providerurl + "search/?%s" % urllib.urlencode(params)
# try:
# data = getNewzbinURL(searchURL)
# except exceptions.NewzbinAPIThrottled:
# #try again if we were throttled
# data = getNewzbinURL(searchURL)
# if data:
# logger.info(u'Parsing results from <a href="%s">%s</a>' % (searchURL, providerurl))
#
# try:
# d = minidom.parseString(data)
# node = d.documentElement
# items = d.getElementsByTagName("item")
# except ExpatError:
# logger.info('Unable to get the NEWZBIN feed. Check that your settings are correct - post a bug if they are')
# items = []
#
# if len(items):
#
# for item in items:
#
# sizenode = item.getElementsByTagName("report:size")[0].childNodes
# titlenode = item.getElementsByTagName("title")[0].childNodes
# linknode = item.getElementsByTagName("link")[0].childNodes
#
# for node in sizenode:
# size = int(node.data)
# for node in titlenode:
# title = node.data
# for node in linknode:
# url = node.data
#
# #exract the reportid from the link nodes
# id_regex = re.escape(providerurl) + 'browse/post/(\d+)/'
# id_match = re.match(id_regex, url)
# if not id_match:
# logger.info("Didn't find a valid Newzbin reportid in linknode")
# else:
# url = id_match.group(1) #we have to make a post request later, need the id
# if url:
# resultlist.append((title, size, url, provider))
# logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
# else:
# logger.info('No url link found in nzb. Skipping.')
#
# else:
# logger.info('No results found from NEWZBIN for %s' % term)
#
#attempt to verify that this isn't a substring result
#when looking for "Foo - Foo" we don't want "Foobar"
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
@@ -616,7 +474,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
nzblist = sorted(newlist, key=lambda title: title[4])
if not len(nzblist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS:
logger.info("Since there were no appropriate lossy matches, going to use lossless instead")
logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead")
nzblist = sorted(flac_list, key=lambda title: title[1], reverse=True)
except Exception, e:
@@ -670,8 +528,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
# If we sent the file to sab, we can check how it was renamed and insert that into the snatched table
(replace_spaces, replace_dots) = sab.checkConfig()
print replace_spaces
print replace_dots
if replace_dots:
nzb_folder_name = helpers.sab_replace_dots(nzb_folder_name)
if replace_spaces:
@@ -727,6 +584,18 @@ def verifyresult(title, artistterm, term, lossless):
if headphones.PREFERRED_QUALITY == (0 or '0') and 'flac' in title.lower() and not lossless:
logger.info("Removed " + title + " from results because it's a lossless album and we're not looking for a lossless album right now")
return False
if headphones.IGNORED_WORDS:
for each_word in helpers.split_string(headphones.IGNORED_WORDS):
if each_word.lower() in title.lower():
logger.info("Removed " + title + " from results because it contains ignored word: '" + each_word + "'")
return False
if headphones.REQUIRED_WORDS:
for each_word in helpers.split_string(headphones.REQUIRED_WORDS):
if each_word.lower() not in title.lower():
logger.info("Removed " + title + " from results because it doesn't contain required word: '" + each_word + "'")
return False
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens: