Files
headphones/headphones/searcher.py
T

460 lines
19 KiB
Python

import urllib, urllib2
import lib.feedparser as feedparser
from xml.dom import minidom
from xml.parsers.expat import ExpatError
import os, re, time
import headphones
from headphones import logger, db, helpers, classes, sab
class NewzbinDownloader(urllib.FancyURLopener):
def __init__(self):
urllib.FancyURLopener.__init__(self)
def http_error_default(self, url, fp, errcode, errmsg, headers):
# if newzbin is throttling us, wait seconds and try again
if errcode == 400:
newzbinErrCode = int(headers.getheader('X-DNZB-RCode'))
if newzbinErrCode == 450:
rtext = str(headers.getheader('X-DNZB-RText'))
result = re.search("wait (\d+) seconds", rtext)
elif newzbinErrCode == 401:
logger.info("Newzbin error 401")
#raise exceptions.AuthException("Newzbin username or password incorrect")
elif newzbinErrCode == 402:
#raise exceptions.AuthException("Newzbin account not premium status, can't download NZBs")
logger.info("Newzbin error 402")
logger.info("Newzbin throttled our NZB downloading, pausing for " + result.group(1) + "seconds")
time.sleep(int(result.group(1)))
#raise exceptions.NewzbinAPIThrottled()
#this should be in a class somewhere
def getNewzbinURL(url):
myOpener = classes.AuthURLOpener(headphones.NEWZBIN_UID, headphones.NEWZBIN_PASSWORD)
try:
f = myOpener.openit(url)
except (urllib.ContentTooShortError, IOError), e:
logger.info("Error loading search results: ContentTooShortError ")
return None
data = f.read()
f.close()
return data
def searchNZB(albumid=None, new=False):
myDB = db.DBConnection()
if albumid:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" AND AlbumID=?', [albumid])
else:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted"')
new = True
for albums in results:
albumid = albums[2]
reldate = albums[3]
try:
year = reldate[:4]
except TypeError:
year = ''
dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':''}
cleanartistalbum = helpers.latinToAscii(helpers.replace_all(albums[0]+' '+albums[1], dic))
# FLAC usually doesn't have a year for some reason so I'll leave it out:
term = re.sub('[\.\-\/]', ' ', '%s' % (cleanartistalbum)).encode('utf-8')
altterm = re.sub('[\.\-\/]', ' ', '%s %s' % (cleanartistalbum, year)).encode('utf-8')
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
term = altterm
logger.info("Searching for %s since it was marked as wanted" % term)
resultlist = []
if headphones.NZBMATRIX:
provider = "nzbmatrix"
if headphones.PREFERRED_QUALITY == 3:
categories = "23"
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "23,22"
maxsize = 2000000000
else:
categories = "22"
maxsize = 300000000
params = { "page": "download",
"username": headphones.NZBMATRIX_USERNAME,
"apikey": headphones.NZBMATRIX_APIKEY,
"subcat": categories,
"age": headphones.USENET_RETENTION,
"english": 1,
"ssl": 1,
"scenename": 1,
"term": term
}
searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params)
logger.info(u"Parsing results from "+searchURL)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from NZBMatrix: %s' % e)
data = False
if data:
d = feedparser.parse(data)
for item in d.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
if size < maxsize:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize for this category, skipping. (Size: %i bytes)' % (title, size))
except AttributeError, e:
logger.info(u"No results found from NZBMatrix for %s" % term)
if headphones.NEWZNAB:
provider = "newznab"
if headphones.PREFERRED_QUALITY == 3:
categories = "3040"
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "3040,3010"
maxsize = 2000000000
else:
categories = "3010"
maxsize = 300000000
params = { "t": "search",
"apikey": headphones.NEWZNAB_APIKEY,
"cat": categories,
"maxage": headphones.USENET_RETENTION,
"q": term
}
searchURL = headphones.NEWZNAB_HOST + '/api?' + urllib.urlencode(params)
logger.info(u"Parsing results from "+searchURL)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from %s: %s' % (headphones.NEWZNAB_HOST, e))
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from %s for %s" % (headphones.NEWZNAB_HOST, term))
pass
else:
for item in d.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
if size < maxsize:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize for this category, skipping. (Size: %i bytes)' % (title, size))
except Exception, e:
logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
if headphones.NZBSORG:
provider = "nzbsorg"
if headphones.PREFERRED_QUALITY == 3:
categories = "5"
maxsize = 10000000000
term = term + ' flac'
elif headphones.PREFERRED_QUALITY:
categories = "5"
maxsize = 2000000000
else:
categories = "5"
maxsize = 300000000
params = { "action": "search",
"dl": 1,
"catid": categories,
"i": headphones.NZBSORG_UID,
"h": headphones.NZBSORG_HASH,
"age": headphones.USENET_RETENTION,
"q": term
}
searchURL = 'https://secure.nzbs.org/rss.php?' + urllib.urlencode(params)
logger.info(u"Parsing results from "+searchURL)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from NZBs.org: %s' % e)
data = False
items = False
if data:
try:
d = minidom.parseString(data)
node = d.documentElement
items = d.getElementsByTagName("item")
except ExpatError:
logger.error('Unable to get the NZBs.org feed. Check that your settings are correct - post a bug if they are')
items = None
if items:
for item in items:
sizenode = item.getElementsByTagName("report:size")[0].childNodes
titlenode = item.getElementsByTagName("title")[0].childNodes
linknode = item.getElementsByTagName("link")[0].childNodes
for node in sizenode:
size = int(node.data)
for node in titlenode:
title = node.data
for node in linknode:
url = node.data
if size < maxsize:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize for this category, skipping. (Size: %i bytes)' % (title, size))
else:
logger.info('No results found from NZBs.org for %s' % term)
if headphones.NEWZBIN:
provider = "newzbin"
providerurl = "https://www.newzbin.com/"
if headphones.PREFERRED_QUALITY == 3:
categories = "7" #music
format = "2" #flac
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "7" #music
format = "10" #mp3+flac
maxsize = 2000000000
else:
categories = "7" #music
format = "8" #mp3
maxsize = 300000000
params = {
"fpn": "p",
'u_nfo_posts_only': 0,
'u_url_posts_only': 0,
'u_comment_posts_only': 0,
'u_show_passworded': 0,
"searchaction": "Search",
#"dl": 1,
"category": categories,
"retention": headphones.USENET_RETENTION,
"ps_rb_audio_format": format,
"feed": "rss",
"u_post_results_amt": 50, #this can default to a high number per user
"hauth": 1,
"q": term
}
searchURL = providerurl + "search/?%s" % urllib.urlencode(params)
data = getNewzbinURL(searchURL)
if data:
logger.info(u"Parsing results from "+searchURL)
try:
d = minidom.parseString(data)
node = d.documentElement
items = d.getElementsByTagName("item")
except ExpatError:
logger.info('Unable to get the NEWZBIN feed. Check that your settings are correct - post a bug if they are')
items = None
if len(items):
for item in items:
sizenode = item.getElementsByTagName("report:size")[0].childNodes
titlenode = item.getElementsByTagName("title")[0].childNodes
linknode = item.getElementsByTagName("link")[0].childNodes
for node in sizenode:
size = int(node.data)
for node in titlenode:
title = node.data
for node in linknode:
url = node.data
#exract the reportid from the link nodes
id_regex = re.escape(providerurl) + 'browse/post/(\d+)/'
id_match = re.match(id_regex, url)
if not id_match:
logger.info("Didn't find a valid Newzbin reportid in linknode")
else:
url = id_match.group(1) #we have to make a post request later, need the id
if size < maxsize and url:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize for this category, skipping. (Size: %i bytes)' % (title, size))
else:
logger.info('No results found from NEWZBIN for %s' % term)
if len(resultlist):
if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
try:
albumlength = sum([pair[0] for pair in tracks])
logger.debug('Album length = %s' % albumlength)
targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
newlist = []
for result in resultlist:
delta = abs(targetsize - result[1])
newlist.append((result[0], result[1], result[2], delta))
nzblist = sorted(newlist, key=lambda title: title[3])
except Exception, e:
logger.debug('Error: %s' % str(e))
logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
else:
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
if new:
# Checks to see if it's already downloaded
i = 0
while i < len(nzblist):
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [nzblist[i][2]])
if len(alreadydownloaded) >= 1:
logger.info('%s has already been downloaded. Skipping.' % nzblist[i][0])
i += 1
else:
bestqual = nzblist[i]
break
try:
x = bestqual[0]
except UnboundLocalError:
logger.info('No more matches for %s' % term)
return
else:
bestqual = nzblist[0]
logger.info(u"Found best result: %s (%s) - %s" % (bestqual[0], bestqual[2], helpers.bytes_to_mb(bestqual[1])))
if bestqual[3] == "newzbin":
#logger.info("Found a newzbin result")
reportid = bestqual[2]
params = urllib.urlencode({"username": headphones.NEWZBIN_UID, "password": headphones.NEWZBIN_PASSWORD, "reportid": reportid})
url = providerurl + "/api/dnzb/"
urllib._urlopener = NewzbinDownloader()
data = urllib.urlopen(url, data=params).read()
nzb = classes.NZBDataSearchResult()
nzb.extraInfo.append(data)
nzb_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
nzb.name = nzb_folder_name
logger.info(u"Sending FILE to SABNZBD: " + nzb.name)
sab.sendNZB(nzb)
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
else:
downloadurl = bestqual[2]
nzb_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
if headphones.SAB_HOST and not headphones.BLACKHOLE:
linkparams = {}
linkparams["mode"] = "addurl"
if headphones.SAB_APIKEY:
linkparams["apikey"] = headphones.SAB_APIKEY
if headphones.SAB_USERNAME:
linkparams["ma_username"] = headphones.SAB_USERNAME
if headphones.SAB_PASSWORD:
linkparams["ma_password"] = headphones.SAB_PASSWORD
if headphones.SAB_CATEGORY:
linkparams["cat"] = headphones.SAB_CATEGORY
linkparams["name"] = downloadurl
linkparams["nzbname"] = nzb_folder_name
saburl = 'http://' + headphones.SAB_HOST + '/sabnzbd/api?' + urllib.urlencode(linkparams)
logger.info(u"Sending link to SABNZBD: " + saburl)
try:
urllib.urlopen(saburl)
except:
logger.error(u"Unable to send link. Are you sure the host address is correct?")
break
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
elif headphones.BLACKHOLE:
nzb_name = nzb_folder_name + '.nzb'
download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name)
try:
urllib.urlretrieve(downloadurl, download_path)
except Exception, e:
logger.error('Couldn\'t retrieve NZB: %s' % e)
break
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])