mirror of
https://github.com/rembo10/headphones.git
synced 2026-05-02 01:39:29 +01:00
940 lines
42 KiB
Python
940 lines
42 KiB
Python
# This file is part of Headphones.
|
|
#
|
|
# Headphones is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Headphones is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import urllib, urllib2, urlparse
|
|
import lib.feedparser as feedparser
|
|
from xml.dom import minidom
|
|
from xml.parsers.expat import ExpatError
|
|
from StringIO import StringIO
|
|
import gzip
|
|
|
|
import os, re, time
|
|
import string
|
|
|
|
import headphones, exceptions
|
|
from headphones import logger, db, helpers, classes, sab
|
|
|
|
class NewzbinDownloader(urllib.FancyURLopener):
|
|
|
|
def __init__(self):
|
|
urllib.FancyURLopener.__init__(self)
|
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
|
|
# if newzbin is throttling us, wait seconds and try again
|
|
if errcode == 400:
|
|
|
|
newzbinErrCode = int(headers.getheader('X-DNZB-RCode'))
|
|
|
|
if newzbinErrCode == 450:
|
|
rtext = str(headers.getheader('X-DNZB-RText'))
|
|
result = re.search("wait (\d+) seconds", rtext)
|
|
|
|
logger.info("Newzbin throttled our NZB downloading, pausing for " + result.group(1) + " seconds")
|
|
time.sleep(int(result.group(1)))
|
|
raise exceptions.NewzbinAPIThrottled()
|
|
|
|
elif newzbinErrCode == 401:
|
|
logger.info("Newzbin error 401")
|
|
#raise exceptions.AuthException("Newzbin username or password incorrect")
|
|
|
|
elif newzbinErrCode == 402:
|
|
#raise exceptions.AuthException("Newzbin account not premium status, can't download NZBs")
|
|
logger.info("Newzbin error 402")
|
|
|
|
#this should be in a class somewhere
|
|
def getNewzbinURL(url):
|
|
|
|
myOpener = classes.AuthURLOpener(headphones.NEWZBIN_UID, headphones.NEWZBIN_PASSWORD)
|
|
try:
|
|
f = myOpener.openit(url)
|
|
except (urllib.ContentTooShortError, IOError), e:
|
|
logger.info("Error loading search results: ContentTooShortError ")
|
|
return None
|
|
|
|
data = f.read()
|
|
f.close()
|
|
|
|
return data
|
|
|
|
def url_fix(s, charset='utf-8'):
|
|
if isinstance(s, unicode):
|
|
s = s.encode(charset, 'ignore')
|
|
scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
|
|
path = urllib.quote(path, '/%')
|
|
qs = urllib.quote_plus(qs, ':&=')
|
|
return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
|
|
|
|
|
|
def searchforalbum(albumid=None, new=False, lossless=False):
|
|
|
|
if not albumid:
|
|
|
|
myDB = db.DBConnection()
|
|
|
|
results = myDB.select('SELECT AlbumID, Status from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
|
|
new = True
|
|
|
|
for result in results:
|
|
foundNZB = "none"
|
|
if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
|
|
if result['Status'] == "Wanted Lossless":
|
|
foundNZB = searchNZB(result['AlbumID'], new, losslessOnly=True)
|
|
else:
|
|
foundNZB = searchNZB(result['AlbumID'], new)
|
|
|
|
if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA) and foundNZB == "none":
|
|
if result['Status'] == "Wanted Lossless":
|
|
searchTorrent(result['AlbumID'], new, losslessOnly=True)
|
|
else:
|
|
searchTorrent(result['AlbumID'], new)
|
|
|
|
else:
|
|
|
|
foundNZB = "none"
|
|
if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
|
|
foundNZB = searchNZB(albumid, new, lossless)
|
|
|
|
if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA) and foundNZB == "none":
|
|
searchTorrent(albumid, new, lossless)
|
|
|
|
def searchNZB(albumid=None, new=False, losslessOnly=False):
|
|
|
|
myDB = db.DBConnection()
|
|
|
|
if albumid:
|
|
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE AlbumID=?', [albumid])
|
|
else:
|
|
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
|
|
new = True
|
|
|
|
for albums in results:
|
|
|
|
albumid = albums[2]
|
|
reldate = albums[3]
|
|
|
|
try:
|
|
year = reldate[:4]
|
|
except TypeError:
|
|
year = ''
|
|
|
|
dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''}
|
|
|
|
cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic))
|
|
cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic))
|
|
|
|
# FLAC usually doesn't have a year for some reason so I'll leave it out
|
|
# Various Artist albums might be listed as VA, so I'll leave that out too
|
|
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
|
|
if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
|
|
term = cleanartist + ' ' + cleanalbum + ' ' + year
|
|
elif albums[0] == 'Various Artists':
|
|
term = cleanalbum + ' ' + year
|
|
else:
|
|
term = cleanartist + ' ' + cleanalbum
|
|
|
|
# Replace bad characters in the term and unicode it
|
|
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
|
|
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
|
|
|
|
logger.info("Searching for %s since it was marked as wanted" % term)
|
|
|
|
resultlist = []
|
|
|
|
if headphones.NZBMATRIX:
|
|
provider = "nzbmatrix"
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "23"
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "23,22"
|
|
else:
|
|
categories = "22"
|
|
|
|
# For some reason NZBMatrix is erroring out/timing out when the term starts with a "The" right now
|
|
# so we'll strip it out for the time being. This may get fixed on their end, it may not, but
|
|
# hopefully this will fix it for now. If you notice anything else it gets stuck on, please post it
|
|
# on Github so it can be added
|
|
if term.lower().startswith("the "):
|
|
term = term[4:]
|
|
|
|
|
|
params = { "page": "download",
|
|
"username": headphones.NZBMATRIX_USERNAME,
|
|
"apikey": headphones.NZBMATRIX_APIKEY,
|
|
"subcat": categories,
|
|
"maxage": headphones.USENET_RETENTION,
|
|
"english": 1,
|
|
"ssl": 1,
|
|
"scenename": 1,
|
|
"term": term
|
|
}
|
|
|
|
searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params)
|
|
logger.info(u'Parsing results from <a href="%s">NZBMatrix</a>' % searchURL)
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from NZBMatrix: %s' % e)
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
|
|
for item in d.entries:
|
|
try:
|
|
url = item.link
|
|
title = item.title
|
|
size = int(item.links[1]['length'])
|
|
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
|
|
except AttributeError, e:
|
|
logger.info(u"No results found from NZBMatrix for %s" % term)
|
|
|
|
if headphones.NEWZNAB:
|
|
provider = "newznab"
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "3040"
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "3040,3010"
|
|
else:
|
|
categories = "3010"
|
|
|
|
params = { "t": "search",
|
|
"apikey": headphones.NEWZNAB_APIKEY,
|
|
"cat": categories,
|
|
"maxage": headphones.USENET_RETENTION,
|
|
"q": term
|
|
}
|
|
|
|
searchURL = headphones.NEWZNAB_HOST + '/api?' + urllib.urlencode(params)
|
|
|
|
logger.info(u'Parsing results from <a href="%s">%s</a>' % (searchURL, headphones.NEWZNAB_HOST))
|
|
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from %s: %s' % (headphones.NEWZNAB_HOST, e))
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
|
|
if not len(d.entries):
|
|
logger.info(u"No results found from %s for %s" % (headphones.NEWZNAB_HOST, term))
|
|
pass
|
|
|
|
else:
|
|
for item in d.entries:
|
|
try:
|
|
url = item.link
|
|
title = item.title
|
|
size = int(item.links[1]['length'])
|
|
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
|
|
except Exception, e:
|
|
logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
|
|
|
|
if headphones.NZBSORG:
|
|
provider = "nzbsorg"
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "3040"
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "3040,3010"
|
|
else:
|
|
categories = "3010"
|
|
|
|
params = { "t": "search",
|
|
"apikey": headphones.NZBSORG_HASH,
|
|
"cat": categories,
|
|
"maxage": headphones.USENET_RETENTION,
|
|
"q": term
|
|
}
|
|
|
|
searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params)
|
|
|
|
logger.info(u'Parsing results from <a href="%s">nzbs.org</a>' % searchURL)
|
|
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from nzbs.org: %s' % e)
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
|
|
if not len(d.entries):
|
|
logger.info(u"No results found from nzbs.org for %s" % term)
|
|
pass
|
|
|
|
else:
|
|
for item in d.entries:
|
|
try:
|
|
url = item.link
|
|
title = item.title
|
|
size = int(item.links[1]['length'])
|
|
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
|
|
except Exception, e:
|
|
logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
|
|
|
|
if headphones.NEWZBIN:
|
|
provider = "newzbin"
|
|
providerurl = "https://www.newzbin2.es/"
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "7" #music
|
|
format = "2" #flac
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "7" #music
|
|
format = "10" #mp3+flac
|
|
else:
|
|
categories = "7" #music
|
|
format = "8" #mp3
|
|
|
|
params = {
|
|
"fpn": "p",
|
|
'u_nfo_posts_only': 0,
|
|
'u_url_posts_only': 0,
|
|
'u_comment_posts_only': 0,
|
|
'u_show_passworded': 0,
|
|
"searchaction": "Search",
|
|
#"dl": 1,
|
|
"category": categories,
|
|
"retention": headphones.USENET_RETENTION,
|
|
"ps_rb_audio_format": format,
|
|
"feed": "rss",
|
|
"u_post_results_amt": 50, #this can default to a high number per user
|
|
"hauth": 1,
|
|
"q": term
|
|
}
|
|
searchURL = providerurl + "search/?%s" % urllib.urlencode(params)
|
|
try:
|
|
data = getNewzbinURL(searchURL)
|
|
except exceptions.NewzbinAPIThrottled:
|
|
#try again if we were throttled
|
|
data = getNewzbinURL(searchURL)
|
|
if data:
|
|
logger.info(u'Parsing results from <a href="%s">%s</a>' % (searchURL, providerurl))
|
|
|
|
try:
|
|
d = minidom.parseString(data)
|
|
node = d.documentElement
|
|
items = d.getElementsByTagName("item")
|
|
except ExpatError:
|
|
logger.info('Unable to get the NEWZBIN feed. Check that your settings are correct - post a bug if they are')
|
|
items = []
|
|
|
|
if len(items):
|
|
|
|
for item in items:
|
|
|
|
sizenode = item.getElementsByTagName("report:size")[0].childNodes
|
|
titlenode = item.getElementsByTagName("title")[0].childNodes
|
|
linknode = item.getElementsByTagName("link")[0].childNodes
|
|
|
|
for node in sizenode:
|
|
size = int(node.data)
|
|
for node in titlenode:
|
|
title = node.data
|
|
for node in linknode:
|
|
url = node.data
|
|
|
|
#exract the reportid from the link nodes
|
|
id_regex = re.escape(providerurl) + 'browse/post/(\d+)/'
|
|
id_match = re.match(id_regex, url)
|
|
if not id_match:
|
|
logger.info("Didn't find a valid Newzbin reportid in linknode")
|
|
else:
|
|
url = id_match.group(1) #we have to make a post request later, need the id
|
|
if url:
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
else:
|
|
logger.info('No url link found in nzb. Skipping.')
|
|
|
|
else:
|
|
logger.info('No results found from NEWZBIN for %s' % term)
|
|
|
|
#attempt to verify that this isn't a substring result
|
|
#when looking for "Foo - Foo" we don't want "Foobar"
|
|
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
|
|
if len(resultlist):
|
|
resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)]
|
|
|
|
if len(resultlist):
|
|
|
|
if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
|
|
|
|
logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
|
|
|
|
tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
|
|
|
|
try:
|
|
albumlength = sum([pair[0] for pair in tracks])
|
|
|
|
targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
|
|
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
|
|
|
|
newlist = []
|
|
|
|
for result in resultlist:
|
|
delta = abs(targetsize - result[1])
|
|
newlist.append((result[0], result[1], result[2], result[3], delta))
|
|
|
|
nzblist = sorted(newlist, key=lambda title: title[4])
|
|
|
|
except Exception, e:
|
|
|
|
logger.debug('Error: %s' % str(e))
|
|
logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
|
|
|
|
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
|
|
|
|
else:
|
|
|
|
nzblist = sorted(resultlist, key=lambda title: title[1], reverse=True)
|
|
|
|
|
|
if new:
|
|
|
|
while True:
|
|
|
|
if len(nzblist):
|
|
|
|
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [nzblist[0][2]])
|
|
|
|
if len(alreadydownloaded):
|
|
logger.info('%s has already been downloaded. Skipping.' % nzblist[0][0])
|
|
nzblist.pop(0)
|
|
|
|
else:
|
|
break
|
|
else:
|
|
logger.info('No more results found for %s' % term)
|
|
return "none"
|
|
|
|
logger.info(u"Pre-processing result")
|
|
|
|
(data, bestqual) = preprocess(nzblist)
|
|
|
|
if data and bestqual:
|
|
logger.info(u'Found best result: <a href="%s">%s</a> - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])))
|
|
nzb_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
|
|
if headphones.SAB_HOST and not headphones.BLACKHOLE:
|
|
|
|
nzb = classes.NZBDataSearchResult()
|
|
nzb.extraInfo.append(data)
|
|
nzb.name = nzb_folder_name
|
|
sab.sendNZB(nzb)
|
|
|
|
elif headphones.BLACKHOLE:
|
|
|
|
nzb_name = nzb_folder_name + '.nzb'
|
|
download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name)
|
|
try:
|
|
f = open(download_path, 'w')
|
|
f.write(data)
|
|
f.close()
|
|
logger.info('File saved to: %s' % nzb_name)
|
|
except Exception, e:
|
|
logger.error('Couldn\'t write NZB file: %s' % e)
|
|
break
|
|
|
|
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
|
|
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name])
|
|
return "found"
|
|
else:
|
|
return "none"
|
|
|
|
|
|
|
|
def verifyresult(title, artistterm, term):
|
|
|
|
title = re.sub('[\.\-\/\_]', ' ', title)
|
|
|
|
#if artistterm != 'Various Artists':
|
|
#
|
|
# if not re.search('^' + re.escape(artistterm), title, re.IGNORECASE):
|
|
# #logger.info("Removed from results: " + title + " (artist not at string start).")
|
|
# #return False
|
|
# elif re.search(re.escape(artistterm) + '\w', title, re.IGNORECASE | re.UNICODE):
|
|
# logger.info("Removed from results: " + title + " (post substring result).")
|
|
# return False
|
|
# elif re.search('\w' + re.escape(artistterm), title, re.IGNORECASE | re.UNICODE):
|
|
# logger.info("Removed from results: " + title + " (pre substring result).")
|
|
# return False
|
|
|
|
#another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
|
|
|
|
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
|
|
for token in tokens:
|
|
|
|
if not token:
|
|
continue
|
|
if token == 'Various' or token == 'Artists' or token == 'VA':
|
|
continue
|
|
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
|
|
cleantoken = ''.join(c for c in token if c not in string.punctuation)
|
|
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
|
|
dic = {'!':'i', '$':'s'}
|
|
dumbtoken = helpers.replace_all(token, dic)
|
|
if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
|
|
logger.info("Removed from results: " + title + " (missing tokens: " + token + " and " + cleantoken + ")")
|
|
return False
|
|
|
|
return True
|
|
|
|
def getresultNZB(result):
|
|
|
|
nzb = None
|
|
|
|
if result[3] == 'newzbin':
|
|
params = urllib.urlencode({"username": headphones.NEWZBIN_UID, "password": headphones.NEWZBIN_PASSWORD, "reportid": result[2]})
|
|
url = "https://www.newzbin2.es" + "/api/dnzb/"
|
|
urllib._urlopener = NewzbinDownloader()
|
|
try:
|
|
nzb = urllib.urlopen(url, data=params).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching nzb from url: %s. Error: %s' % (url, e))
|
|
except exceptions.NewzbinAPIThrottled:
|
|
#TODO: This has created a potentially infinite loop? As long as they keep throttling we keep trying.
|
|
logger.info("Done waiting for Newzbin API throttle limit, starting downloads again")
|
|
getresultNZB(result)
|
|
except AttributeError:
|
|
logger.warn("AttributeError in getresultNZB.")
|
|
else:
|
|
try:
|
|
nzb = urllib2.urlopen(result[2], timeout=30).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching nzb from url: ' + result[2] + ' %s' % e)
|
|
return nzb
|
|
|
|
def preprocess(resultlist):
|
|
|
|
if not headphones.USENET_RETENTION:
|
|
usenet_retention = 2000
|
|
else:
|
|
usenet_retention = int(headphones.USENET_RETENTION)
|
|
|
|
for result in resultlist:
|
|
nzb = getresultNZB(result)
|
|
if nzb:
|
|
try:
|
|
d = minidom.parseString(nzb)
|
|
node = d.documentElement
|
|
nzbfiles = d.getElementsByTagName("file")
|
|
skipping = False
|
|
for nzbfile in nzbfiles:
|
|
if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400):
|
|
logger.info('NZB contains a file out of your retention. Skipping.')
|
|
skipping = True
|
|
break
|
|
if skipping:
|
|
continue
|
|
|
|
#TODO: Do we want rar checking in here to try to keep unknowns out?
|
|
#or at least the option to do so?
|
|
except ExpatError:
|
|
logger.error('Unable to parse the best result NZB. Skipping.')
|
|
continue
|
|
return nzb, result
|
|
else:
|
|
logger.error("Couldn't retrieve the best nzb. Skipping.")
|
|
return (False, False)
|
|
|
|
|
|
|
|
def searchTorrent(albumid=None, new=False, losslessOnly=False):
|
|
|
|
myDB = db.DBConnection()
|
|
|
|
if albumid:
|
|
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE AlbumID=?', [albumid])
|
|
else:
|
|
results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
|
|
new = True
|
|
|
|
for albums in results:
|
|
|
|
albumid = albums[2]
|
|
reldate = albums[3]
|
|
|
|
try:
|
|
year = reldate[:4]
|
|
except TypeError:
|
|
year = ''
|
|
|
|
dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''}
|
|
|
|
cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic))
|
|
cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic))
|
|
|
|
# FLAC usually doesn't have a year for some reason so I'll leave it out
|
|
# Various Artist albums might be listed as VA, so I'll leave that out too
|
|
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
|
|
if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
|
|
term = cleanartist + ' ' + cleanalbum + ' ' + year
|
|
elif albums[0] == 'Various Artists':
|
|
term = cleanalbum + ' ' + year
|
|
else:
|
|
term = cleanartist + ' ' + cleanalbum
|
|
|
|
# Replace bad characters in the term and unicode it
|
|
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
|
|
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
|
|
|
|
logger.info("Searching torrents for %s since it was marked as wanted" % term)
|
|
|
|
resultlist = []
|
|
minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1
|
|
|
|
if headphones.KAT:
|
|
provider = "Kick Ass Torrent"
|
|
providerurl = url_fix("http://www.kat.ph/search/" + term)
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "7" #music
|
|
format = "2" #flac
|
|
maxsize = 10000000000
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "7" #music
|
|
format = "10" #mp3+flac
|
|
maxsize = 10000000000
|
|
else:
|
|
categories = "7" #music
|
|
format = "8" #mp3
|
|
maxsize = 300000000
|
|
|
|
params = {
|
|
"categories[0]": "music",
|
|
"field": "seeders",
|
|
"sorder": "desc",
|
|
"rss": "1"
|
|
}
|
|
searchURL = providerurl + "/?%s" % urllib.urlencode(params)
|
|
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from %s: %s' % (provider, e))
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
if not len(d.entries):
|
|
logger.info(u"No results found from %s for %s" % (provider, term))
|
|
pass
|
|
|
|
else:
|
|
for item in d.entries:
|
|
try:
|
|
rightformat = True
|
|
title = item.title
|
|
seeders = item.seeds
|
|
url = item.links[1]['url']
|
|
size = int(item.links[1]['length'])
|
|
try:
|
|
if format == "2":
|
|
request = urllib2.Request(url)
|
|
request.add_header('Accept-encoding', 'gzip')
|
|
response = urllib2.urlopen(request)
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
buf = StringIO( response.read())
|
|
f = gzip.GzipFile(fileobj=buf)
|
|
torrent = f.read()
|
|
else:
|
|
torrent = response.read()
|
|
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
|
|
rightformat = False
|
|
except Exception, e:
|
|
rightformat = False
|
|
if rightformat == True and size < maxsize and minimumseeders < int(seeders):
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
else:
|
|
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat))
|
|
|
|
except Exception, e:
|
|
logger.error(u"An unknown error occured in the KAT parser: %s" % e)
|
|
|
|
if headphones.ISOHUNT:
|
|
provider = "ISOhunt"
|
|
providerurl = url_fix("http://isohunt.com/js/rss/" + term)
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "7" #music
|
|
format = "2" #flac
|
|
maxsize = 10000000000
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "7" #music
|
|
format = "10" #mp3+flac
|
|
maxsize = 10000000000
|
|
else:
|
|
categories = "7" #music
|
|
format = "8" #mp3
|
|
maxsize = 300000000
|
|
|
|
params = {
|
|
"iht": "2",
|
|
"sort": "seeds"
|
|
}
|
|
searchURL = providerurl + "?%s" % urllib.urlencode(params)
|
|
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from %s: %s' % (provider, e))
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
if not len(d.entries):
|
|
logger.info(u"No results found from %s for %s" % (provider, term))
|
|
pass
|
|
|
|
else:
|
|
for item in d.entries:
|
|
try:
|
|
rightformat = True
|
|
title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title)
|
|
title = title.replace("[]","")
|
|
sxstart = item.description.find("Seeds: ") + 7
|
|
seeds = ""
|
|
while item.description[sxstart:sxstart + 1] != " ":
|
|
seeds = seeds + item.description[sxstart:sxstart + 1]
|
|
sxstart = sxstart + 1
|
|
url = item.links[1]['url']
|
|
size = int(item.links[1]['length'])
|
|
try:
|
|
if format == "2":
|
|
request = urllib2.Request(url)
|
|
request.add_header('Accept-encoding', 'gzip')
|
|
response = urllib2.urlopen(request)
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
buf = StringIO( response.read())
|
|
f = gzip.GzipFile(fileobj=buf)
|
|
torrent = f.read()
|
|
else:
|
|
torrent = response.read()
|
|
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
|
|
rightformat = False
|
|
except Exception, e:
|
|
rightformat = False
|
|
if rightformat == True and size < maxsize and minimumseeders < seeds:
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
else:
|
|
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat))
|
|
|
|
except Exception, e:
|
|
logger.error(u"An unknown error occured in the ISOhunt parser: %s" % e)
|
|
|
|
if headphones.MININOVA:
|
|
provider = "Mininova"
|
|
providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5")
|
|
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
|
|
categories = "7" #music
|
|
format = "2" #flac
|
|
maxsize = 10000000000
|
|
elif headphones.PREFERRED_QUALITY:
|
|
categories = "7" #music
|
|
format = "10" #mp3+flac
|
|
maxsize = 10000000000
|
|
else:
|
|
categories = "7" #music
|
|
format = "8" #mp3
|
|
maxsize = 300000000
|
|
|
|
searchURL = providerurl
|
|
|
|
try:
|
|
data = urllib2.urlopen(searchURL, timeout=20).read()
|
|
except urllib2.URLError, e:
|
|
logger.warn('Error fetching data from %s: %s' % (provider, e))
|
|
data = False
|
|
|
|
if data:
|
|
|
|
d = feedparser.parse(data)
|
|
if not len(d.entries):
|
|
logger.info(u"No results found from %s for %s" % (provider, term))
|
|
pass
|
|
|
|
else:
|
|
for item in d.entries:
|
|
try:
|
|
rightformat = True
|
|
title = item.title
|
|
sxstart = item.description.find("Ratio: ") + 7
|
|
seeds = ""
|
|
while item.description[sxstart:sxstart + 1] != " ":
|
|
seeds = seeds + item.description[sxstart:sxstart + 1]
|
|
sxstart = sxstart + 1
|
|
url = item.links[1]['url']
|
|
size = int(item.links[1]['length'])
|
|
try:
|
|
if format == "2":
|
|
request = urllib2.Request(url)
|
|
request.add_header('Accept-encoding', 'gzip')
|
|
response = urllib2.urlopen(request)
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
buf = StringIO( response.read())
|
|
f = gzip.GzipFile(fileobj=buf)
|
|
torrent = f.read()
|
|
else:
|
|
torrent = response.read()
|
|
if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
|
|
rightformat = False
|
|
except Exception, e:
|
|
rightformat = False
|
|
if rightformat == True and size < maxsize and minimumseeders < seeds:
|
|
resultlist.append((title, size, url, provider))
|
|
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
|
|
else:
|
|
logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat))
|
|
|
|
except Exception, e:
|
|
logger.error(u"An unknown error occured in the MiniNova Parser: %s" % e)
|
|
|
|
|
|
|
|
#attempt to verify that this isn't a substring result
|
|
#when looking for "Foo - Foo" we don't want "Foobar"
|
|
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
|
|
if len(resultlist):
|
|
resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)]
|
|
|
|
if len(resultlist):
|
|
|
|
if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
|
|
|
|
logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
|
|
|
|
tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
|
|
|
|
try:
|
|
albumlength = sum([pair[0] for pair in tracks])
|
|
|
|
targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
|
|
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
|
|
|
|
newlist = []
|
|
|
|
for result in resultlist:
|
|
delta = abs(targetsize - result[1])
|
|
newlist.append((result[0], result[1], result[2], result[3], delta))
|
|
|
|
torrentlist = sorted(newlist, key=lambda title: title[4])
|
|
|
|
except Exception, e:
|
|
|
|
logger.debug('Error: %s' % str(e))
|
|
logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
|
|
|
|
torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
|
|
|
|
else:
|
|
|
|
torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
|
|
|
|
|
|
if new:
|
|
|
|
while True:
|
|
|
|
if len(torrentlist):
|
|
|
|
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [torrentlist[0][2]])
|
|
|
|
if len(alreadydownloaded):
|
|
logger.info('%s has already been downloaded. Skipping.' % torrentlist[0][0])
|
|
torrentlist.pop(0)
|
|
|
|
else:
|
|
break
|
|
else:
|
|
logger.info('No more results found for %s' % term)
|
|
return
|
|
|
|
logger.info(u"Pre-processing result")
|
|
|
|
(data, bestqual) = preprocesstorrent(torrentlist)
|
|
|
|
if data and bestqual:
|
|
logger.info(u'Found best result: <a href="%s">%s</a> - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])))
|
|
torrent_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year)
|
|
if headphones.TORRENTBLACKHOLE_DIR == "sendtracker":
|
|
|
|
torrent = classes.TorrentDataSearchResult()
|
|
torrent.extraInfo.append(data)
|
|
torrent.name = torrent_folder_name
|
|
sab.sendTorrent(torrent)
|
|
|
|
elif headphones.TORRENTBLACKHOLE_DIR != "":
|
|
|
|
torrent_name = torrent_folder_name + '.torrent'
|
|
download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
|
|
try:
|
|
f = open(download_path, 'wb')
|
|
f.write(data)
|
|
f.close()
|
|
logger.info('File saved to: %s' % torrent_name)
|
|
except Exception, e:
|
|
logger.error('Couldn\'t write Torrent file: %s' % e)
|
|
break
|
|
|
|
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
|
|
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name])
|
|
|
|
def preprocesstorrent(resultlist):
|
|
selresult = ""
|
|
for result in resultlist:
|
|
try:
|
|
if selresult == "":
|
|
selresult = result
|
|
request = urllib2.Request(result[2])
|
|
request.add_header('Accept-encoding', 'gzip')
|
|
response = urllib2.urlopen(request)
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
buf = StringIO( response.read())
|
|
f = gzip.GzipFile(fileobj=buf)
|
|
torrent = f.read()
|
|
else:
|
|
torrent = response.read()
|
|
elif int(selresult[1]) < int(result[1]):
|
|
selresult = result
|
|
request = urllib2.Request(result[2])
|
|
request.add_header('Accept-encoding', 'gzip')
|
|
response = urllib2.urlopen(request)
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
buf = StringIO( response.read())
|
|
f = gzip.GzipFile(fileobj=buf)
|
|
torrent = f.read()
|
|
else:
|
|
torrent = response.read()
|
|
except ExpatError:
|
|
logger.error('Unable to torrent file. Skipping.')
|
|
continue
|
|
return torrent, selresult
|