Files
headphones/headphones/searcher.py
rembo10 20bd6ae35f GPL v3
2012-06-19 13:02:12 +05:30

940 lines
42 KiB
Python

# This file is part of Headphones.
#
# Headphones is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Headphones is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
import urllib, urllib2, urlparse
import lib.feedparser as feedparser
from xml.dom import minidom
from xml.parsers.expat import ExpatError
from StringIO import StringIO
import gzip
import os, re, time
import string
import headphones, exceptions
from headphones import logger, db, helpers, classes, sab
class NewzbinDownloader(urllib.FancyURLopener):
def __init__(self):
urllib.FancyURLopener.__init__(self)
def http_error_default(self, url, fp, errcode, errmsg, headers):
# if newzbin is throttling us, wait seconds and try again
if errcode == 400:
newzbinErrCode = int(headers.getheader('X-DNZB-RCode'))
if newzbinErrCode == 450:
rtext = str(headers.getheader('X-DNZB-RText'))
result = re.search("wait (\d+) seconds", rtext)
logger.info("Newzbin throttled our NZB downloading, pausing for " + result.group(1) + " seconds")
time.sleep(int(result.group(1)))
raise exceptions.NewzbinAPIThrottled()
elif newzbinErrCode == 401:
logger.info("Newzbin error 401")
#raise exceptions.AuthException("Newzbin username or password incorrect")
elif newzbinErrCode == 402:
#raise exceptions.AuthException("Newzbin account not premium status, can't download NZBs")
logger.info("Newzbin error 402")
#this should be in a class somewhere
def getNewzbinURL(url):
myOpener = classes.AuthURLOpener(headphones.NEWZBIN_UID, headphones.NEWZBIN_PASSWORD)
try:
f = myOpener.openit(url)
except (urllib.ContentTooShortError, IOError), e:
logger.info("Error loading search results: ContentTooShortError ")
return None
data = f.read()
f.close()
return data
def url_fix(s, charset='utf-8'):
if isinstance(s, unicode):
s = s.encode(charset, 'ignore')
scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
path = urllib.quote(path, '/%')
qs = urllib.quote_plus(qs, ':&=')
return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
def searchforalbum(albumid=None, new=False, lossless=False):
if not albumid:
myDB = db.DBConnection()
results = myDB.select('SELECT AlbumID, Status from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
new = True
for result in results:
foundNZB = "none"
if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
if result['Status'] == "Wanted Lossless":
foundNZB = searchNZB(result['AlbumID'], new, losslessOnly=True)
else:
foundNZB = searchNZB(result['AlbumID'], new)
if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA) and foundNZB == "none":
if result['Status'] == "Wanted Lossless":
searchTorrent(result['AlbumID'], new, losslessOnly=True)
else:
searchTorrent(result['AlbumID'], new)
else:
foundNZB = "none"
if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
foundNZB = searchNZB(albumid, new, lossless)
if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA) and foundNZB == "none":
searchTorrent(albumid, new, lossless)
def searchNZB(albumid=None, new=False, losslessOnly=False):
myDB = db.DBConnection()
if albumid:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE AlbumID=?', [albumid])
else:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
new = True
for albums in results:
albumid = albums[2]
reldate = albums[3]
try:
year = reldate[:4]
except TypeError:
year = ''
dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''}
cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic))
cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic))
# FLAC usually doesn't have a year for some reason so I'll leave it out
# Various Artist albums might be listed as VA, so I'll leave that out too
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
term = cleanartist + ' ' + cleanalbum + ' ' + year
elif albums[0] == 'Various Artists':
term = cleanalbum + ' ' + year
else:
term = cleanartist + ' ' + cleanalbum
# Replace bad characters in the term and unicode it
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
logger.info("Searching for %s since it was marked as wanted" % term)
resultlist = []
if headphones.NZBMATRIX:
provider = "nzbmatrix"
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "23"
elif headphones.PREFERRED_QUALITY:
categories = "23,22"
else:
categories = "22"
# For some reason NZBMatrix is erroring out/timing out when the term starts with a "The" right now
# so we'll strip it out for the time being. This may get fixed on their end, it may not, but
# hopefully this will fix it for now. If you notice anything else it gets stuck on, please post it
# on Github so it can be added
if term.lower().startswith("the "):
term = term[4:]
params = { "page": "download",
"username": headphones.NZBMATRIX_USERNAME,
"apikey": headphones.NZBMATRIX_APIKEY,
"subcat": categories,
"maxage": headphones.USENET_RETENTION,
"english": 1,
"ssl": 1,
"scenename": 1,
"term": term
}
searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params)
logger.info(u'Parsing results from <a href="%s">NZBMatrix</a>' % searchURL)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from NZBMatrix: %s' % e)
data = False
if data:
d = feedparser.parse(data)
for item in d.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
except AttributeError, e:
logger.info(u"No results found from NZBMatrix for %s" % term)
if headphones.NEWZNAB:
provider = "newznab"
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "3040"
elif headphones.PREFERRED_QUALITY:
categories = "3040,3010"
else:
categories = "3010"
params = { "t": "search",
"apikey": headphones.NEWZNAB_APIKEY,
"cat": categories,
"maxage": headphones.USENET_RETENTION,
"q": term
}
searchURL = headphones.NEWZNAB_HOST + '/api?' + urllib.urlencode(params)
logger.info(u'Parsing results from <a href="%s">%s</a>' % (searchURL, headphones.NEWZNAB_HOST))
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from %s: %s' % (headphones.NEWZNAB_HOST, e))
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from %s for %s" % (headphones.NEWZNAB_HOST, term))
pass
else:
for item in d.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
except Exception, e:
logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
if headphones.NZBSORG:
provider = "nzbsorg"
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "3040"
elif headphones.PREFERRED_QUALITY:
categories = "3040,3010"
else:
categories = "3010"
params = { "t": "search",
"apikey": headphones.NZBSORG_HASH,
"cat": categories,
"maxage": headphones.USENET_RETENTION,
"q": term
}
searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params)
logger.info(u'Parsing results from <a href="%s">nzbs.org</a>' % searchURL)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from nzbs.org: %s' % e)
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from nzbs.org for %s" % term)
pass
else:
for item in d.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
except Exception, e:
logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
if headphones.NEWZBIN:
provider = "newzbin"
providerurl = "https://www.newzbin2.es/"
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "7" #music
format = "2" #flac
elif headphones.PREFERRED_QUALITY:
categories = "7" #music
format = "10" #mp3+flac
else:
categories = "7" #music
format = "8" #mp3
params = {
"fpn": "p",
'u_nfo_posts_only': 0,
'u_url_posts_only': 0,
'u_comment_posts_only': 0,
'u_show_passworded': 0,
"searchaction": "Search",
#"dl": 1,
"category": categories,
"retention": headphones.USENET_RETENTION,
"ps_rb_audio_format": format,
"feed": "rss",
"u_post_results_amt": 50, #this can default to a high number per user
"hauth": 1,
"q": term
}
searchURL = providerurl + "search/?%s" % urllib.urlencode(params)
try:
data = getNewzbinURL(searchURL)
except exceptions.NewzbinAPIThrottled:
#try again if we were throttled
data = getNewzbinURL(searchURL)
if data:
logger.info(u'Parsing results from <a href="%s">%s</a>' % (searchURL, providerurl))
try:
d = minidom.parseString(data)
node = d.documentElement
items = d.getElementsByTagName("item")
except ExpatError:
logger.info('Unable to get the NEWZBIN feed. Check that your settings are correct - post a bug if they are')
items = []
if len(items):
for item in items:
sizenode = item.getElementsByTagName("report:size")[0].childNodes
titlenode = item.getElementsByTagName("title")[0].childNodes
linknode = item.getElementsByTagName("link")[0].childNodes
for node in sizenode:
size = int(node.data)
for node in titlenode:
title = node.data
for node in linknode:
url = node.data
#exract the reportid from the link nodes
id_regex = re.escape(providerurl) + 'browse/post/(\d+)/'
id_match = re.match(id_regex, url)
if not id_match:
logger.info("Didn't find a valid Newzbin reportid in linknode")
else:
url = id_match.group(1) #we have to make a post request later, need the id
if url:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('No url link found in nzb. Skipping.')
else:
logger.info('No results found from NEWZBIN for %s' % term)
#attempt to verify that this isn't a substring result
#when looking for "Foo - Foo" we don't want "Foobar"
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
if len(resultlist):
resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)]
if len(resultlist):
if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
try:
albumlength = sum([pair[0] for pair in tracks])
targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
newlist = []
for result in resultlist:
delta = abs(targetsize - result[1])
newlist.append((result[0], result[1], result[2], result[3], delta))
nzblist = sorted(newlist, key=lambda title: title[4])
except Exception, e:
logger.debug('Error: %s' % str(e))
logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
else:
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
if new:
while True:
if len(nzblist):
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [nzblist[0][2]])
if len(alreadydownloaded):
logger.info('%s has already been downloaded. Skipping.' % nzblist[0][0])
nzblist.pop(0)
else:
break
else:
logger.info('No more results found for %s' % term)
return "none"
logger.info(u"Pre-processing result")
(data, bestqual) = preprocess(nzblist)
if data and bestqual:
logger.info(u'Found best result: <a href="%s">%s</a> - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])))
nzb_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
if headphones.SAB_HOST and not headphones.BLACKHOLE:
nzb = classes.NZBDataSearchResult()
nzb.extraInfo.append(data)
nzb.name = nzb_folder_name
sab.sendNZB(nzb)
elif headphones.BLACKHOLE:
nzb_name = nzb_folder_name + '.nzb'
download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name)
try:
f = open(download_path, 'w')
f.write(data)
f.close()
logger.info('File saved to: %s' % nzb_name)
except Exception, e:
logger.error('Couldn\'t write NZB file: %s' % e)
break
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
return "found"
else:
return "none"
def verifyresult(title, artistterm, term):
title = re.sub('[\.\-\/\_]', ' ', title)
#if artistterm != 'Various Artists':
#
# if not re.search('^' + re.escape(artistterm), title, re.IGNORECASE):
# #logger.info("Removed from results: " + title + " (artist not at string start).")
# #return False
# elif re.search(re.escape(artistterm) + '\w', title, re.IGNORECASE | re.UNICODE):
# logger.info("Removed from results: " + title + " (post substring result).")
# return False
# elif re.search('\w' + re.escape(artistterm), title, re.IGNORECASE | re.UNICODE):
# logger.info("Removed from results: " + title + " (pre substring result).")
# return False
#another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens:
if not token:
continue
if token == 'Various' or token == 'Artists' or token == 'VA':
continue
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
cleantoken = ''.join(c for c in token if c not in string.punctuation)
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
dic = {'!':'i', '$':'s'}
dumbtoken = helpers.replace_all(token, dic)
if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
return False
return True
def getresultNZB(result):
nzb = None
if result[3] == 'newzbin':
params = urllib.urlencode({"username": headphones.NEWZBIN_UID, "password": headphones.NEWZBIN_PASSWORD, "reportid": result[2]})
url = "https://www.newzbin2.es" + "/api/dnzb/"
urllib._urlopener = NewzbinDownloader()
try:
nzb = urllib.urlopen(url, data=params).read()
except urllib2.URLError, e:
logger.warn('Error fetching nzb from url: %s. Error: %s' % (url, e))
except exceptions.NewzbinAPIThrottled:
#TODO: This has created a potentially infinite loop? As long as they keep throttling we keep trying.
logger.info("Done waiting for Newzbin API throttle limit, starting downloads again")
getresultNZB(result)
except AttributeError:
logger.warn("AttributeError in getresultNZB.")
else:
try:
nzb = urllib2.urlopen(result[2], timeout=30).read()
except urllib2.URLError, e:
logger.warn('Error fetching nzb from url: ' + result[2] + ' %s' % e)
return nzb
def preprocess(resultlist):
if not headphones.USENET_RETENTION:
usenet_retention = 2000
else:
usenet_retention = int(headphones.USENET_RETENTION)
for result in resultlist:
nzb = getresultNZB(result)
if nzb:
try:
d = minidom.parseString(nzb)
node = d.documentElement
nzbfiles = d.getElementsByTagName("file")
skipping = False
for nzbfile in nzbfiles:
if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400):
logger.info('NZB contains a file out of your retention. Skipping.')
skipping = True
break
if skipping:
continue
#TODO: Do we want rar checking in here to try to keep unknowns out?
#or at least the option to do so?
except ExpatError:
logger.error('Unable to parse the best result NZB. Skipping.')
continue
return nzb, result
else:
logger.error("Couldn't retrieve the best nzb. Skipping.")
return (False, False)
def searchTorrent(albumid=None, new=False, losslessOnly=False):
myDB = db.DBConnection()
if albumid:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE AlbumID=?', [albumid])
else:
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
new = True
for albums in results:
albumid = albums[2]
reldate = albums[3]
try:
year = reldate[:4]
except TypeError:
year = ''
dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''}
cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic))
cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic))
# FLAC usually doesn't have a year for some reason so I'll leave it out
# Various Artist albums might be listed as VA, so I'll leave that out too
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
term = cleanartist + ' ' + cleanalbum + ' ' + year
elif albums[0] == 'Various Artists':
term = cleanalbum + ' ' + year
else:
term = cleanartist + ' ' + cleanalbum
# Replace bad characters in the term and unicode it
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
logger.info("Searching torrents for %s since it was marked as wanted" % term)
resultlist = []
minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1
if headphones.KAT:
provider = "Kick Ass Torrent"
providerurl = url_fix("http://www.kat.ph/search/" + term)
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "7" #music
format = "2" #flac
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "7" #music
format = "10" #mp3+flac
maxsize = 10000000000
else:
categories = "7" #music
format = "8" #mp3
maxsize = 300000000
params = {
"categories[0]": "music",
"field": "seeders",
"sorder": "desc",
"rss": "1"
}
searchURL = providerurl + "/?%s" % urllib.urlencode(params)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from %s: %s' % (provider, e))
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from %s for %s" % (provider, term))
pass
else:
for item in d.entries:
try:
rightformat = True
title = item.title
seeders = item.seeds
url = item.links[1]['url']
size = int(item.links[1]['length'])
try:
if format == "2":
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
torrent = f.read()
else:
torrent = response.read()
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
rightformat = False
except Exception, e:
rightformat = False
if rightformat == True and size < maxsize and minimumseeders < int(seeders):
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat))
except Exception, e:
logger.error(u"An unknown error occured in the KAT parser: %s" % e)
if headphones.ISOHUNT:
provider = "ISOhunt"
providerurl = url_fix("http://isohunt.com/js/rss/" + term)
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "7" #music
format = "2" #flac
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "7" #music
format = "10" #mp3+flac
maxsize = 10000000000
else:
categories = "7" #music
format = "8" #mp3
maxsize = 300000000
params = {
"iht": "2",
"sort": "seeds"
}
searchURL = providerurl + "?%s" % urllib.urlencode(params)
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from %s: %s' % (provider, e))
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from %s for %s" % (provider, term))
pass
else:
for item in d.entries:
try:
rightformat = True
title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title)
title = title.replace("[]","")
sxstart = item.description.find("Seeds: ") + 7
seeds = ""
while item.description[sxstart:sxstart + 1] != " ":
seeds = seeds + item.description[sxstart:sxstart + 1]
sxstart = sxstart + 1
url = item.links[1]['url']
size = int(item.links[1]['length'])
try:
if format == "2":
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
torrent = f.read()
else:
torrent = response.read()
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
rightformat = False
except Exception, e:
rightformat = False
if rightformat == True and size < maxsize and minimumseeders < seeds:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat))
except Exception, e:
logger.error(u"An unknown error occured in the ISOhunt parser: %s" % e)
if headphones.MININOVA:
provider = "Mininova"
providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5")
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "7" #music
format = "2" #flac
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
categories = "7" #music
format = "10" #mp3+flac
maxsize = 10000000000
else:
categories = "7" #music
format = "8" #mp3
maxsize = 300000000
searchURL = providerurl
try:
data = urllib2.urlopen(searchURL, timeout=20).read()
except urllib2.URLError, e:
logger.warn('Error fetching data from %s: %s' % (provider, e))
data = False
if data:
d = feedparser.parse(data)
if not len(d.entries):
logger.info(u"No results found from %s for %s" % (provider, term))
pass
else:
for item in d.entries:
try:
rightformat = True
title = item.title
sxstart = item.description.find("Ratio: ") + 7
seeds = ""
while item.description[sxstart:sxstart + 1] != " ":
seeds = seeds + item.description[sxstart:sxstart + 1]
sxstart = sxstart + 1
url = item.links[1]['url']
size = int(item.links[1]['length'])
try:
if format == "2":
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
torrent = f.read()
else:
torrent = response.read()
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
rightformat = False
except Exception, e:
rightformat = False
if rightformat == True and size < maxsize and minimumseeders < seeds:
resultlist.append((title, size, url, provider))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat))
except Exception, e:
logger.error(u"An unknown error occured in the MiniNova Parser: %s" % e)
#attempt to verify that this isn't a substring result
#when looking for "Foo - Foo" we don't want "Foobar"
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
if len(resultlist):
resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)]
if len(resultlist):
if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
try:
albumlength = sum([pair[0] for pair in tracks])
targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
newlist = []
for result in resultlist:
delta = abs(targetsize - result[1])
newlist.append((result[0], result[1], result[2], result[3], delta))
torrentlist = sorted(newlist, key=lambda title: title[4])
except Exception, e:
logger.debug('Error: %s' % str(e))
logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
else:
torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
if new:
while True:
if len(torrentlist):
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [torrentlist[0][2]])
if len(alreadydownloaded):
logger.info('%s has already been downloaded. Skipping.' % torrentlist[0][0])
torrentlist.pop(0)
else:
break
else:
logger.info('No more results found for %s' % term)
return
logger.info(u"Pre-processing result")
(data, bestqual) = preprocesstorrent(torrentlist)
if data and bestqual:
logger.info(u'Found best result: <a href="%s">%s</a> - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])))
torrent_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
if headphones.TORRENTBLACKHOLE_DIR == "sendtracker":
torrent = classes.TorrentDataSearchResult()
torrent.extraInfo.append(data)
torrent.name = torrent_folder_name
sab.sendTorrent(torrent)
elif headphones.TORRENTBLACKHOLE_DIR != "":
torrent_name = torrent_folder_name + '.torrent'
download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
try:
f = open(download_path, 'wb')
f.write(data)
f.close()
logger.info('File saved to: %s' % torrent_name)
except Exception, e:
logger.error('Couldn\'t write Torrent file: %s' % e)
break
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name])
def preprocesstorrent(resultlist):
selresult = ""
for result in resultlist:
try:
if selresult == "":
selresult = result
request = urllib2.Request(result[2])
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
torrent = f.read()
else:
torrent = response.read()
elif int(selresult[1]) < int(result[1]):
selresult = result
request = urllib2.Request(result[2])
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
torrent = f.read()
else:
torrent = response.read()
except ExpatError:
logger.error('Unable to torrent file. Skipping.')
continue
return torrent, selresult