From 6279a95b6c1104dc55d8ef1753d1413374105cd0 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Thu, 3 Apr 2014 19:29:12 -0700 Subject: [PATCH] Heavily modified searcher.py in order to allow searching for torrents over nzbs & combining results to pick the best of both --- headphones/searcher.py | 2459 +++++++++++++++++++--------------------- 1 file changed, 1187 insertions(+), 1272 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 89214b81..cd03cf48 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -108,566 +108,659 @@ def patch_http_response_read(func): httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read) -def searchforalbum(albumid=None, new=False, lossless=False): +def searchforalbum(albumid=None, new=False, losslessOnly=False): + myDB = db.DBConnection() + if not albumid: - myDB = db.DBConnection() - - results = myDB.select('SELECT AlbumID, AlbumTitle, ArtistName, Status from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True + results = myDB.select('SELECT * from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - for result in results: - foundNZB = "none" - if not result['AlbumTitle'] or not result['ArtistName']: - logger.warn('Skipping release %s. No title available' % result['AlbumID']) - else: - if (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) and (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST): - if result['Status'] == "Wanted Lossless": - foundNZB = searchNZB(result['AlbumID'], new, losslessOnly=True) - else: - foundNZB = searchNZB(result['AlbumID'], new) - - if (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) and foundNZB == "none": - - if result['Status'] == "Wanted Lossless": - searchTorrent(result['AlbumID'], new, losslessOnly=True) - else: - searchTorrent(result['AlbumID'], new) + for album in results: + + if not album['AlbumTitle'] or not album['ArtistName']: + logger.warn('Skipping release %s. No title available' % album['AlbumID']) + continue + + new = True + + if album['Status'] == "Wanted Lossless": + losslessOnly = True + + do_sorted_search(album, new, losslessOnly) + + else: + logger.info("Got to zero") + album = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone() + logger.info('Searching for %s' % album['AlbumTitle']) + do_sorted_search(album, new, losslessOnly) + + logger.info('Search for Wanted albums complete') + +def do_sorted_search(album, new, losslessOnly): + + NZB_PROVIDERS = (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) + NZB_DOWNLOADERS = (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST) + TORRENT_PROVIDERS = (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) + + results = [] + + if headphones.PREFER_TORRENTS == 0: + + if NZB_PROVIDERS and NZB_DOWNLOADERS: + results = searchNZB(album, new, losslessOnly) + + if not results and TORRENT_PROVIDERS: + results = searchTorrent(album, new, losslessOnly) + + elif headphones.PREFER_TORRENTS == 1: + + if TORRENT_PROVIDERS: + results = searchTorrent(album, new, losslessOnly) + + if not results and NZB_PROVIDERS and NZB_DOWNLOADERS: + results = searchNZB(album, new, losslessOnly) else: - foundNZB = "none" - - if (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) and (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST): - foundNZB = searchNZB(albumid, new, lossless) + if NZB_PROVIDERS and NZB_DOWNLOADERS: + nzb_results = searchNZB(album, new, losslessOnly) - if (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) and foundNZB == "none": - searchTorrent(albumid, new, lossless) + if TORRENT_PROVIDERS: + torrent_results = searchTorrent(album, new, losslessOnly) - logger.info('Search for Wanted albums complete') - -def searchNZB(albumid=None, new=False, losslessOnly=False): + results = nzb_results + torrent_results + + sorted_search_results = sort_search_results(results, album, new) + logger.info(u"Making sure we can download the best result") + (data, bestqual) = preprocess(sorted_search_results) + + if data and bestqual: + send_to_downloader(data, bestqual, album) + +def sort_search_results(resultlist, album, new): myDB = db.DBConnection() - if albumid: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, Type, SearchTerm from albums WHERE AlbumID=?', [albumid]) - else: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, Type, SearchTerm from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True + # Add a priority if it has any of the preferred words + temp_list = [] + for result in resultlist: + if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): + temp_list.append((result[0],result[1],result[2],result[3],result[4],1)) + else: + temp_list.append((result[0],result[1],result[2],result[3],result[4],0)) - for albums in results: + resultlist = temp_list - albumid = albums[2] - reldate = albums[3] + if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: + + logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) + + tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [album['AlbumID']]) try: - year = reldate[:4] - except TypeError: - year = '' + albumlength = sum([pair[0] for pair in tracks]) - dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''} + targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic)).strip() - cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic)).strip() + if not targetsize: + logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle'])) + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) - # Use the provided search term if available, otherwise build a search term - if albums[5]: - term = albums[5] + else: + logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) + newlist = [] + flac_list = [] + if headphones.PREFERRED_BITRATE_HIGH_BUFFER: + high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 + else: + high_size_limit = None + if headphones.PREFERRED_BITRATE_LOW_BUFFER: + low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 + else: + low_size_limit = None + + for result in resultlist: + + if high_size_limit and (int(result[1]) > high_size_limit): + + logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") + + # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches + if 'flac' in result[0].lower(): + flac_list.append((result[0], result[1], result[2], result[3], result[4], result[5])) + + continue + + if low_size_limit and (int(result[1]) < low_size_limit): + logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") + continue + + delta = abs(targetsize - int(result[1])) + newlist.append((result[0], result[1], result[2], result[3], result[4], result[5], delta)) + + finallist = sorted(newlist, key=lambda title: (-title[5], title[6])) + + if not len(finallist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: + logger.info("Since there were no appropriate lossy matches (and at least one lossless match, going to use lossless instead") + finallist = sorted(flac_list, key=lambda title: (title[5], int(title[1])), reverse=True) + + except Exception, e: + + logger.debug('Error: %s' % str(e)) + logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle'])) + + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + + else: + + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + + if new: + + while True: + + if len(finallist): + + alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [finallist[0][2]]) + + if len(alreadydownloaded): + logger.info('%s has already been downloaded. Skipping.' % finallist[0][0]) + finallist.pop(0) + + else: + break + else: + logger.info('No more results found for %s' % term) + return None + + if not len(finallist): + logger.info('No appropriate matches found for %s' % term) + return None + + return finallist + +def get_year_from_release_date(release_date): + + try: + year = release_date[:4] + except TypeError: + year = '' + + return year + +def searchNZB(album, new=False, losslessOnly=False): + + albumid = album['AlbumID'] + reldate = album['ReleaseDate'] + + year = get_year_from_release_date(reldate) + + dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''} + + cleanalbum = helpers.latinToAscii(helpers.replace_all(album['AlbumTitle'], dic)).strip() + cleanartist = helpers.latinToAscii(helpers.replace_all(album['ArtistName'], dic)).strip() + + # Use the provided search term if available, otherwise build a search term + if album['SearchTerm']: + term = album['SearchTerm'] + + else: + # FLAC usually doesn't have a year for some reason so I'll leave it out + # Various Artist albums might be listed as VA, so I'll leave that out too + # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + term = cleanartist + ' ' + cleanalbum + ' ' + year + elif album['ArtistName'] == 'Various Artists': + term = cleanalbum + ' ' + year else: - # FLAC usually doesn't have a year for some reason so I'll leave it out - # Various Artist albums might be listed as VA, so I'll leave that out too - # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4: - term = cleanartist + ' ' + cleanalbum + ' ' + year - elif albums[0] == 'Various Artists': - term = cleanalbum + ' ' + year - else: - term = cleanartist + ' ' + cleanalbum + term = cleanartist + ' ' + cleanalbum - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') + # Replace bad characters in the term and unicode it + term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8') + artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8') - logger.info("Searching for %s since it was marked as wanted" % term) + logger.info("Searching for %s since it was marked as wanted" % term) - resultlist = [] + resultlist = [] - if headphones.HEADPHONES_INDEXER: + if headphones.HEADPHONES_INDEXER: - provider = "headphones" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" + provider = "headphones" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" + else: + categories = "3010" - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") - params = { "t": "search", - "cat": categories, - "apikey": '89edf227c1de9b3de50383fff11466c6', - "maxage": headphones.USENET_RETENTION, - "q": term - } + params = { "t": "search", + "cat": categories, + "apikey": '89edf227c1de9b3de50383fff11466c6', + "maxage": headphones.USENET_RETENTION, + "q": term + } - searchURL = 'http://headphones.codeshy.com/newznab/api?' + urllib.urlencode(params) + searchURL = 'http://headphones.codeshy.com/newznab/api?' + urllib.urlencode(params) - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - base64string = base64.encodestring('%s:%s' % (headphones.HPUSER, headphones.HPPASS)).replace('\n', '') - request.add_header("Authorization", "Basic %s" % base64string) - - opener = urllib2.build_opener() + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + base64string = base64.encodestring('%s:%s' % (headphones.HPUSER, headphones.HPPASS)).replace('\n', '') + request.add_header("Authorization", "Basic %s" % base64string) + + opener = urllib2.build_opener() - logger.info(u'Parsing results from %s' % (searchURL, 'Headphones Index')) + logger.info(u'Parsing results from %s' % (searchURL, 'Headphones Index')) - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from %s: %s' % ('Headphones Index', e)) - data = False + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from %s: %s' % ('Headphones Index', e)) + data = False - if data: + if data: - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % ('Headphones Index', term)) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NEWZNAB: - - newznab_hosts = [] - - for newznab_host in headphones.EXTRA_NEWZNABS: - if newznab_host[2] == '1' or newznab_host[2] == 1: - newznab_hosts.append(newznab_host) - - provider = "newznab" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" - - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") - - for newznab_host in newznab_hosts: - - # Add a little mod for kere.ws - if newznab_host[0] == "http://kere.ws": - if categories == "3040": - categories = categories + ",4070" - elif categories == "3040,3010": - categories = categories + ",4070,4010" - elif categories == "3010": - categories = categories + ",4010" - else: - categories = categories + ",4050" - - params = { "t": "search", - "apikey": newznab_host[1], - "cat": categories, - "maxage": headphones.USENET_RETENTION, - "q": term - } - - searchURL = newznab_host[0] + '/api?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from %s' % (searchURL, newznab_host[0])) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from %s: %s' % (newznab_host[0], e)) - data = False - - if data: - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (newznab_host[0], term)) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NZBSORG: - provider = "nzbsorg" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" - - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") - - params = { "t": "search", - "apikey": headphones.NZBSORG_HASH, - "cat": categories, - "maxage": headphones.USENET_RETENTION, - "q": term - } - - searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params) - - logger.info(u'Parsing results from nzbs.org' % searchURL) - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from nzbs.org: %s' % e) - data = False - - if data: - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from nzbs.org for %s" % term) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NZBSRUS: - - provider = "nzbsrus" - categories = "54" - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - sub = "16" - elif headphones.PREFERRED_QUALITY: - sub = "" - else: - sub = "15" - - if albums['Type'] == 'Other': - sub = "" - logger.info("Album type is audiobook/spokenword. Searching all music categories") - - params = { "uid": headphones.NZBSRUS_UID, - "key": headphones.NZBSRUS_APIKEY, - "cat": categories, - "sub": sub, - "age": headphones.USENET_RETENTION, - "searchtext": term - } - - searchURL = 'https://www.nzbsrus.com/api.php?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from NZBsRus' % searchURL) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from NZBsRus: %s' % e) - data = False - - if data: - - d = json.loads(data) - - if d['matches'] <= 0: - logger.info(u"No results found from NZBsRus for %s" % term) - pass - - else: - for item in d['results']: - try: - url = "http://www.nzbsrus.com/nzbdownload_rss.php/" + item['id'] + "/" + headphones.NZBSRUS_UID + "/" + item['key'] - title = item['name'] - size = int(item['size']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - - if headphones.OMGWTFNZBS: - - provider = "omgwtfnzbs" - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "22" - elif headphones.PREFERRED_QUALITY: - categories = "22,7" - else: - categories = "7" - - if albums['Type'] == 'Other': - categories = "29" - logger.info("Album type is audiobook/spokenword. Searching all music categories") - - params = { "user": headphones.OMGWTFNZBS_UID, - "api": headphones.OMGWTFNZBS_APIKEY, - "catid": categories, - "retention": headphones.USENET_RETENTION, - "search": term - } - - searchURL = 'http://api.omgwtfnzbs.org/json/?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from omgwtfnzbs' % searchURL) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from omgwtfnzbs: %s' % e) - data = False - - if data: - - d = json.loads(data) - - if 'notice' in data: - logger.info(u"No results returned from omgwtfnzbs: %s" % d['notice']) - pass - - else: - for item in d: - try: - url = item['getnzb'] - title = item['release'] - size = int(item['sizebytes']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the results: %s" % e) - - # attempt to verify that this isn't a substring result - # when looking for "Foo - Foo" we don't want "Foobar" - # this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - # - # Also will filter flac & remix albums if not specifically looking for it - # This code also checks the ignored words and required words - - if len(resultlist): - resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] - - if len(resultlist): - - # Add a priority if it has any of the preferred words - temp_list = [] - for result in resultlist: - if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): - temp_list.append((result[0],result[1],result[2],result[3],1)) - else: - temp_list.append((result[0],result[1],result[2],result[3],0)) - - resultlist = temp_list - - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - - logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) - - tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid]) - - try: - albumlength = sum([pair[0] for pair in tracks]) - - targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - - if not targetsize: - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) - newlist = [] - flac_list = [] - - if headphones.PREFERRED_BITRATE_HIGH_BUFFER: - high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 - else: - high_size_limit = None - if headphones.PREFERRED_BITRATE_LOW_BUFFER: - low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 - else: - low_size_limit = None - - for result in resultlist: - - if high_size_limit and (int(result[1]) > high_size_limit): - - logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") - - # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches - if 'flac' in result[0].lower(): - flac_list.append((result[0], result[1], result[2], result[3], result[4])) - - continue - - if low_size_limit and (int(result[1]) < low_size_limit): - logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") - continue - - delta = abs(targetsize - int(result[1])) - newlist.append((result[0], result[1], result[2], result[3], result[4], delta)) - - nzblist = sorted(newlist, key=lambda title: (-title[4], title[5])) - - if not len(nzblist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: - logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") - nzblist = sorted(flac_list, key=lambda title: (title[4], int(title[1])), reverse=True) - - except Exception, e: - - logger.debug('Error: %s' % str(e)) - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % ('Headphones Index', term)) + pass else: - - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - - - if new: - - while True: - - if len(nzblist): - - alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [nzblist[0][2]]) - - if len(alreadydownloaded): - logger.info('%s has already been downloaded. Skipping.' % nzblist[0][0]) - nzblist.pop(0) - - else: - break - else: - logger.info('No more results found for %s' % term) - return "none" - - if not len(nzblist): - logger.info('No appropriate matches found for %s' % term) - return "none" - - logger.info(u"Pre-processing result") - - (data, bestqual) = preprocess(nzblist) - - if data and bestqual: - logger.info(u'Found best result: %s - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) - # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads - nzb_folder_name = helpers.sab_sanitize_foldername(bestqual[0]) - if headphones.NZB_DOWNLOADER == 1: - - nzb = classes.NZBDataSearchResult() - nzb.extraInfo.append(data) - nzb.name = nzb_folder_name - nzbget.sendNZB(nzb) - - elif headphones.NZB_DOWNLOADER == 0: - - nzb = classes.NZBDataSearchResult() - nzb.extraInfo.append(data) - nzb.name = nzb_folder_name - sab.sendNZB(nzb) - - # If we sent the file to sab, we can check how it was renamed and insert that into the snatched table - (replace_spaces, replace_dots) = sab.checkConfig() - - if replace_dots: - nzb_folder_name = helpers.sab_replace_dots(nzb_folder_name) - if replace_spaces: - nzb_folder_name = helpers.sab_replace_spaces(nzb_folder_name) - - else: - - nzb_name = nzb_folder_name + '.nzb' - download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name) + for item in d.entries: try: - prev = os.umask(headphones.UMASK) - f = open(download_path, 'w') - f.write(data) - f.close() - os.umask(prev) - logger.info('File saved to: %s' % nzb_name) + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + except Exception, e: - logger.error('Couldn\'t write NZB file: %s' % e) - break + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NEWZNAB: - myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name, "nzb"]) - return "found" - else: - return "none" + newznab_hosts = [] + + for newznab_host in headphones.EXTRA_NEWZNABS: + if newznab_host[2] == '1' or newznab_host[2] == 1: + newznab_hosts.append(newznab_host) + + provider = "newznab" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" else: - return "none" + categories = "3010" + + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") + + for newznab_host in newznab_hosts: + + # Add a little mod for kere.ws + if newznab_host[0] == "http://kere.ws": + if categories == "3040": + categories = categories + ",4070" + elif categories == "3040,3010": + categories = categories + ",4070,4010" + elif categories == "3010": + categories = categories + ",4010" + else: + categories = categories + ",4050" + + params = { "t": "search", + "apikey": newznab_host[1], + "cat": categories, + "maxage": headphones.USENET_RETENTION, + "q": term + } + + searchURL = newznab_host[0] + '/api?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from %s' % (searchURL, newznab_host[0])) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from %s: %s' % (newznab_host[0], e)) + data = False + + if data: + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (newznab_host[0], term)) + pass + + else: + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NZBSORG: + provider = "nzbsorg" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" + else: + categories = "3010" + + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") + + params = { "t": "search", + "apikey": headphones.NZBSORG_HASH, + "cat": categories, + "maxage": headphones.USENET_RETENTION, + "q": term + } + + searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params) + + logger.info(u'Parsing results from nzbs.org' % searchURL) + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from nzbs.org: %s' % e) + data = False + + if data: + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from nzbs.org for %s" % term) + pass + + else: + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NZBSRUS: + + provider = "nzbsrus" + categories = "54" + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + sub = "16" + elif headphones.PREFERRED_QUALITY: + sub = "" + else: + sub = "15" + + if album['Type'] == 'Other': + sub = "" + logger.info("Album type is audiobook/spokenword. Searching all music categories") + + params = { "uid": headphones.NZBSRUS_UID, + "key": headphones.NZBSRUS_APIKEY, + "cat": categories, + "sub": sub, + "age": headphones.USENET_RETENTION, + "searchtext": term + } + + searchURL = 'https://www.nzbsrus.com/api.php?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from NZBsRus' % searchURL) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from NZBsRus: %s' % e) + data = False + + if data: + + d = json.loads(data) + + if d['matches'] <= 0: + logger.info(u"No results found from NZBsRus for %s" % term) + pass + + else: + for item in d['results']: + try: + url = "http://www.nzbsrus.com/nzbdownload_rss.php/" + item['id'] + "/" + headphones.NZBSRUS_UID + "/" + item['key'] + title = item['name'] + size = int(item['size']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + if headphones.OMGWTFNZBS: + + provider = "omgwtfnzbs" + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "22" + elif headphones.PREFERRED_QUALITY: + categories = "22,7" + else: + categories = "7" + + if album['Type'] == 'Other': + categories = "29" + logger.info("Album type is audiobook/spokenword. Searching all music categories") + + params = { "user": headphones.OMGWTFNZBS_UID, + "api": headphones.OMGWTFNZBS_APIKEY, + "catid": categories, + "retention": headphones.USENET_RETENTION, + "search": term + } + + searchURL = 'http://api.omgwtfnzbs.org/json/?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from omgwtfnzbs' % searchURL) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from omgwtfnzbs: %s' % e) + data = False + + if data: + + d = json.loads(data) + + if 'notice' in data: + logger.info(u"No results returned from omgwtfnzbs: %s" % d['notice']) + pass + + else: + for item in d: + try: + url = item['getnzb'] + title = item['release'] + size = int(item['sizebytes']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the results: %s" % e) + + # attempt to verify that this isn't a substring result + # when looking for "Foo - Foo" we don't want "Foobar" + # this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + # + # Also will filter flac & remix albums if not specifically looking for it + # This code also checks the ignored words and required words + + if len(resultlist): + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + + if len(resultlist): + return resultlist + + +def send_to_downloader(data, bestqual, album): + + logger.info(u'Found best result from %s: %s - %s' % (bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) + # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads + kind = bestqual[4] + + if kind == 'nzb': + folder_name = helpers.sab_sanitize_foldername(bestqual[0]) + + if headphones.NZB_DOWNLOADER == 1: + + nzb = classes.NZBDataSearchResult() + nzb.extraInfo.append(data) + nzb.name = nzb_folder_name + nzbget.sendNZB(nzb) + + elif headphones.NZB_DOWNLOADER == 0: + + nzb = classes.NZBDataSearchResult() + nzb.extraInfo.append(data) + nzb.name = nzb_folder_name + sab.sendNZB(nzb) + + # If we sent the file to sab, we can check how it was renamed and insert that into the snatched table + (replace_spaces, replace_dots) = sab.checkConfig() + + if replace_dots: + nzb_folder_name = helpers.sab_replace_dots(nzb_folder_name) + if replace_spaces: + nzb_folder_name = helpers.sab_replace_spaces(nzb_folder_name) + + else: + + nzb_name = nzb_folder_name + '.nzb' + download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name) + try: + prev = os.umask(headphones.UMASK) + f = open(download_path, 'w') + f.write(data) + f.close() + os.umask(prev) + logger.info('File saved to: %s' % nzb_name) + except Exception, e: + logger.error('Couldn\'t write NZB file: %s' % e) + return + else: + folder_name = '%s - %s [%s]' % (helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), get_year_from_release_date(album['ReleaseDate'])) + + # Blackhole + if headphones.TORRENT_DOWNLOADER == 0: + + if bestqual[2].startswith("magnet:"): + logger.error("Cannot save magnet files to blackhole. Please switch your torrent downloader to Transmission or uTorrent") + return + + # Get torrent name from .torrent, this is usually used by the torrent client as the folder name + + torrent_name = folder_name + '.torrent' + download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) + try: + if bestqual[3] == 'rutracker.org': + download_path = rutracker.get_torrent(bestqual[2], headphones.TORRENTBLACKHOLE_DIR) + if not download_path: + return + else: + #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name. + prev = os.umask(headphones.UMASK) + torrent_file = open(download_path, 'wb') + torrent_file.write(data) + torrent_file.close() + os.umask(prev) + + #Open the fresh torrent file again so we can extract the proper torrent name + #Used later in post-processing. + torrent_file = open(download_path, 'rb') + torrent_info = bencode.bdecode(torrent_file.read()) + torrent_file.close() + torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8') + logger.info('Torrent folder name: %s' % torrent_folder_name) + except Exception, e: + logger.error('Couldn\'t get name from Torrent file: %s' % e) + return + + elif headphones.TORRENT_DOWNLOADER == 1: + logger.info("Sending torrent to Transmission") + + # rutracker needs cookies to be set, pass the .torrent file instead of url + if bestqual[3] == 'rutracker.org': + file_or_url = rutracker.get_torrent(bestqual[2]) + else: + file_or_url = bestqual[2] + + torrentid = transmission.addTorrent(file_or_url) + + if not torrentid: + logger.error("Error sending torrent to Transmission. Are you sure it's running?") + return + + folder_name = transmission.getTorrentFolder(torrentid) + if folder_name: + logger.info('Torrent folder name: %s' % folder_name) + else: + logger.error('Torrent folder name could not be determined') + return + + # remove temp .torrent file created above + if bestqual[3] == 'rutracker.org': + try: + shutil.rmtree(os.path.split(file_or_url)[0]) + except Exception, e: + logger.warning('Couldn\'t remove temp dir %s' % e) + + myDB = db.DBConnection() + myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [album['AlbumID']]) + myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Snatched", folder_name, kind]) def verifyresult(title, artistterm, term, lossless): @@ -768,811 +861,633 @@ def getresultNZB(result): logger.warn('Error fetching nzb from url: ' + result[2] + ' %s' % e) return nzb -def preprocess(resultlist): +def searchTorrent(album, new=False, losslessOnly=False): - if not headphones.USENET_RETENTION: - usenet_retention = 2000 - else: - usenet_retention = int(headphones.USENET_RETENTION) - - for result in resultlist: - nzb = getresultNZB(result) - if nzb: - try: - d = minidom.parseString(nzb) - node = d.documentElement - nzbfiles = d.getElementsByTagName("file") - skipping = False - for nzbfile in nzbfiles: - if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400): - logger.info('NZB contains a file out of your retention. Skipping.') - skipping = True - break - if skipping: - continue - - #TODO: Do we want rar checking in here to try to keep unknowns out? - #or at least the option to do so? - except Exception, e: - logger.error('Unable to parse the best result NZB. Error: ' + str(e) + '. (Make sure your username/password/API is correct for provider: ' + result[3]) - continue - return nzb, result - else: - logger.error("Couldn't retrieve the best nzb. Skipping.") - return (False, False) - - - -def searchTorrent(albumid=None, new=False, losslessOnly=False): global gazelle # persistent what.cd api object to reduce number of login attempts - myDB = db.DBConnection() - - if albumid: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, SearchTerm from albums WHERE AlbumID=?', [albumid]) - else: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, SearchTerm from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True - # rutracker login - if headphones.RUTRACKER and results: + if headphones.RUTRACKER and album: rulogin = rutracker.login(headphones.RUTRACKER_USER, headphones.RUTRACKER_PASSWORD) if not rulogin: logger.info(u'Could not login to rutracker, search results will exclude this provider') - for albums in results: + albumid = album['AlbumID'] + reldate = album['ReleaseDate'] - albumid = albums[2] - reldate = albums[3] + year = get_year_from_release_date(reldate) + + # MERGE THIS WITH THE TERM CLEANUP FROM searchNZB + dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':' ', '*':''} + + semi_cleanalbum = helpers.replace_all(album['AlbumTitle'], dic) + cleanalbum = helpers.latinToAscii(semi_cleanalbum) + semi_cleanartist = helpers.replace_all(album['ArtistName'], dic) + cleanartist = helpers.latinToAscii(semi_cleanartist) + + # Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot + # of these torrent providers are just using cleanartist/cleanalbum terms + if album['SearchTerm']: + term = album['SearchTerm'] + + else: + # FLAC usually doesn't have a year for some reason so I'll leave it out + # Various Artist albums might be listed as VA, so I'll leave that out too + # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + term = cleanartist + ' ' + cleanalbum + ' ' + year + elif album['ArtistName'] == 'Various Artists': + term = cleanalbum + ' ' + year + else: + term = cleanartist + ' ' + cleanalbum + + # Save user search term + if album['SearchTerm']: + usersearchterm = term + else: + usersearchterm = '' + + semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace') + semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace') + # Replace bad characters in the term and unicode it + term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') + artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace') + albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace') + + logger.info("Searching torrents for %s since it was marked as wanted" % term) + + resultlist = [] + pre_sorted_results = False + minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1 + + if headphones.KAT: + provider = "Kick Ass Torrent" + providerurl = url_fix("http://kickass.to/usearch/" + term) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + params = { + "categories[0]": "music", + "field": "seeders", + "sorder": "desc", + "rss": "1" + } + searchURL = providerurl + "/?%s" % urllib.urlencode(params) try: - year = reldate[:4] - except TypeError: - year = '' + data = urllib2.urlopen(searchURL, timeout=20) + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False - dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':' ', '*':''} + if data: - semi_cleanalbum = helpers.replace_all(albums[1], dic) - cleanalbum = helpers.latinToAscii(semi_cleanalbum) - semi_cleanartist = helpers.replace_all(albums[0], dic) - cleanartist = helpers.latinToAscii(semi_cleanartist) + logger.info(u'Parsing results from KAT' % searchURL) - # Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot - # of these torrent providers are just using cleanartist/cleanalbum terms - if albums[4]: - term = albums[4] + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass - else: - # FLAC usually doesn't have a year for some reason so I'll leave it out - # Various Artist albums might be listed as VA, so I'll leave that out too - # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4: - term = cleanartist + ' ' + cleanalbum + ' ' + year - elif albums[0] == 'Various Artists': - term = cleanalbum + ' ' + year else: - term = cleanartist + ' ' + cleanalbum - - # Save user search term - if albums[4]: - usersearchterm = term - else: - usersearchterm = '' - - semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace') - semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace') - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace') - albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace') - - logger.info("Searching torrents for %s since it was marked as wanted" % term) - - resultlist = [] - pre_sorted_results = False - minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1 - - if headphones.KAT: - provider = "Kick Ass Torrent" - providerurl = url_fix("http://kickass.to/usearch/" + term) - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - params = { - "categories[0]": "music", - "field": "seeders", - "sorder": "desc", - "rss": "1" - } - searchURL = providerurl + "/?%s" % urllib.urlencode(params) - - try: - data = urllib2.urlopen(searchURL, timeout=20) - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from KAT' % searchURL) - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: + for item in d.entries: + try: + rightformat = True + title = item['title'] + seeders = item['torrent_seeds'] + url = item['links'][1]['href'] + size = int(item['links'][1]['length']) try: - rightformat = True - title = item['title'] - seeders = item['torrent_seeds'] - url = item['links'][1]['href'] - size = int(item['links'][1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - request.add_header('Referer', 'http://kat.ph/') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < int(seeders): - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat)) - + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + request.add_header('Referer', 'http://kat.ph/') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False except Exception, e: - logger.error(u"An unknown error occurred in the KAT parser: %s" % e) + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < int(seeders): + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat)) - if headphones.WAFFLES: - provider = "Waffles.fm" - providerurl = url_fix("https://www.waffles.fm/browse.php") + except Exception, e: + logger.error(u"An unknown error occurred in the KAT parser: %s" % e) + + if headphones.WAFFLES: + provider = "Waffles.fm" + providerurl = url_fix("https://www.waffles.fm/browse.php") + + bitrate = None + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + format = "FLAC" + bitrate = "(Lossless)" + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + format = "FLAC OR MP3" + maxsize = 10000000000 + else: + format = "MP3" + maxsize = 300000000 + + if not usersearchterm: + query_items = ['artist:"%s"' % artistterm, + 'album:"%s"' % albumterm, + 'year:(%s)' % year] + else: + query_items = [usersearchterm] + + query_items.extend(['format:(%s)' % format, + 'size:[0 TO %d]' % maxsize, + '-seeders:0']) # cut out dead torrents + + if bitrate: + query_items.append('bitrate:"%s"' % bitrate) + + params = { + "uid": headphones.WAFFLES_UID, + "passkey": headphones.WAFFLES_PASSKEY, + "rss": "1", + "c0": "1", + "s": "seeders", # sort by + "d": "desc" # direction + } + + searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items))) + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False + + if data: + + logger.info(u'Parsing results from Waffles.fm' % searchURL) + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in d.entries: + + try: + title = item.title + desc_match = re.search(r"Size: (\d+)<", item.description) + size = int(desc_match.group(1)) + url = item.link + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + except Exception, e: + logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s" % e) + + # rutracker.org + + if headphones.RUTRACKER and rulogin: + + provider = "rutracker.org" + + # Ignore if release date not specified, results too unpredictable + + if not year and not usersearchterm: + logger.info(u'Release date not specified, ignoring for rutracker.org') + else: + + bitrate = False - bitrate = None if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - format = "FLAC" - bitrate = "(Lossless)" + format = 'lossless' maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - format = "FLAC OR MP3" + elif headphones.PREFERRED_QUALITY == 1: + format = 'lossless+mp3' maxsize = 10000000000 else: - format = "MP3" + format = 'mp3' maxsize = 300000000 + if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: + bitrate = True + + # build search url based on above if not usersearchterm: - query_items = ['artist:"%s"' % artistterm, - 'album:"%s"' % albumterm, - 'year:(%s)' % year] + searchURL = rutracker.searchurl(artistterm, albumterm, year, format) else: - query_items = [usersearchterm] + searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format) - query_items.extend(['format:(%s)' % format, - 'size:[0 TO %d]' % maxsize, - '-seeders:0']) # cut out dead torrents + logger.info(u'Parsing results from rutracker.org' % searchURL) + # parse results and get best match + + rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate) + + # add best match to overall results list + + if rulist: + for ru in rulist: + title = ru[0].decode('utf-8') + size = ru[1] + url = ru[2] + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info(u"No valid results found from %s" % (provider)) + + if headphones.WHATCD: + provider = "What.cd" + providerurl = "http://what.cd/" + + bitrate = None + bitrate_string = bitrate + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: # Lossless Only mode + search_formats = [gazelleformat.FLAC] + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY == 2: # Preferred quality mode + search_formats = [None] # should return all + bitrate = headphones.PREFERRED_BITRATE if bitrate: - query_items.append('bitrate:"%s"' % bitrate) - - params = { - "uid": headphones.WAFFLES_UID, - "passkey": headphones.WAFFLES_PASSKEY, - "rss": "1", - "c0": "1", - "s": "seeders", # sort by - "d": "desc" # direction - } - - searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items))) + for encoding_string in gazelleencoding.ALL_ENCODINGS: + if re.search(bitrate, encoding_string, flags=re.I): + bitrate_string = encoding_string + if bitrate_string not in gazelleencoding.ALL_ENCODINGS: + raise Exception("Preferred bitrate %s not recognized by %s" % (bitrate_string, provider)) + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY == 1: # Highest quality including lossless + search_formats = [gazelleformat.FLAC, gazelleformat.MP3] + maxsize = 10000000000 + else: # Highest quality excluding lossless + search_formats = [gazelleformat.MP3] + maxsize = 300000000 + if not gazelle or not gazelle.logged_in(): try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False + logger.info(u"Attempting to log in to What.cd...") + gazelle = gazelleapi.GazelleAPI(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD) + gazelle._login() + except Exception, e: + gazelle = None + logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e))) - if data: + if gazelle and gazelle.logged_in(): + logger.info(u"Searching %s..." % provider) + all_torrents = [] + for search_format in search_formats: + all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term, + groupname=semi_clean_album_term, + format=search_format, encoding=bitrate_string)['results']) - logger.info(u'Parsing results from Waffles.fm' % searchURL) + # filter on format, size, and num seeders + logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") + match_torrents = [ torrent for torrent in all_torrents if torrent.size <= maxsize ] + match_torrents = [ torrent for torrent in match_torrents if torrent.seeders >= minimumseeders ] - d = feedparser.parse(data) + logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass + # sort by times d/l'd + if not len(match_torrents): + logger.info(u"No results found from %s for %s after filtering" % (provider, term)) + elif len(match_torrents) > 1: + logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % + (len(match_torrents), provider, artistterm, albumterm)) + logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) + match_torrents.sort(key=lambda x: int(x.snatched), reverse=True) + if gazelleformat.MP3 in search_formats: + # sort by size after rounding to nearest 10MB...hacky, but will favor highest quality + match_torrents.sort(key=lambda x: int(10 * round(x.size/1024./1024./10.)), reverse=True) + if search_formats and None not in search_formats: + match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless +# if bitrate: +# match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) +# match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) + logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - else: - for item in d.entries: + pre_sorted_results = True + for torrent in match_torrents: + if not torrent.file_path: + torrent.group.update_group_data() # will load the file_path for the individual torrents + resultlist.append((torrent.file_path, + torrent.size, + gazelle.generate_torrent_link(torrent.id), + provider, + 'torrent')) - try: - title = item.title - desc_match = re.search(r"Size: (\d+)<", item.description) - size = int(desc_match.group(1)) - url = item.link - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - except Exception, e: - logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s" % e) - - # rutracker.org - - if headphones.RUTRACKER and rulogin: - - provider = "rutracker.org" - - # Ignore if release date not specified, results too unpredictable - - if not year and not usersearchterm: - logger.info(u'Release date not specified, ignoring for rutracker.org') - else: - - bitrate = False - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - format = 'lossless' - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 1: - format = 'lossless+mp3' - maxsize = 10000000000 - else: - format = 'mp3' - maxsize = 300000000 - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - bitrate = True - - # build search url based on above - - if not usersearchterm: - searchURL = rutracker.searchurl(artistterm, albumterm, year, format) - else: - searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format) - - logger.info(u'Parsing results from rutracker.org' % searchURL) - - # parse results and get best match - - rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate) - - # add best match to overall results list - - if rulist: - for ru in rulist: - title = ru[0].decode('utf-8') - size = ru[1] - url = ru[2] - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info(u"No valid results found from %s" % (provider)) - - if headphones.WHATCD: - provider = "What.cd" - providerurl = "http://what.cd/" - - bitrate = None - bitrate_string = bitrate - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: # Lossless Only mode - search_formats = [gazelleformat.FLAC] - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 2: # Preferred quality mode - search_formats = [None] # should return all - bitrate = headphones.PREFERRED_BITRATE - if bitrate: - for encoding_string in gazelleencoding.ALL_ENCODINGS: - if re.search(bitrate, encoding_string, flags=re.I): - bitrate_string = encoding_string - if bitrate_string not in gazelleencoding.ALL_ENCODINGS: - raise Exception("Preferred bitrate %s not recognized by %s" % (bitrate_string, provider)) - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 1: # Highest quality including lossless - search_formats = [gazelleformat.FLAC, gazelleformat.MP3] - maxsize = 10000000000 - else: # Highest quality excluding lossless - search_formats = [gazelleformat.MP3] - maxsize = 300000000 - - if not gazelle or not gazelle.logged_in(): - try: - logger.info(u"Attempting to log in to What.cd...") - gazelle = gazelleapi.GazelleAPI(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD) - gazelle._login() - except Exception, e: - gazelle = None - logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e))) - - if gazelle and gazelle.logged_in(): - logger.info(u"Searching %s..." % provider) - all_torrents = [] - for search_format in search_formats: - all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term, - groupname=semi_clean_album_term, - format=search_format, encoding=bitrate_string)['results']) - - # filter on format, size, and num seeders - logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") - match_torrents = [ torrent for torrent in all_torrents if torrent.size <= maxsize ] - match_torrents = [ torrent for torrent in match_torrents if torrent.seeders >= minimumseeders ] - - logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - - # sort by times d/l'd - if not len(match_torrents): - logger.info(u"No results found from %s for %s after filtering" % (provider, term)) - elif len(match_torrents) > 1: - logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % - (len(match_torrents), provider, artistterm, albumterm)) - logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) - match_torrents.sort(key=lambda x: int(x.snatched), reverse=True) - if gazelleformat.MP3 in search_formats: - # sort by size after rounding to nearest 10MB...hacky, but will favor highest quality - match_torrents.sort(key=lambda x: int(10 * round(x.size/1024./1024./10.)), reverse=True) - if search_formats and None not in search_formats: - match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless - # if bitrate: - # match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) - # match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) - logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - - pre_sorted_results = True - for torrent in match_torrents: - if not torrent.file_path: - torrent.group.update_group_data() # will load the file_path for the individual torrents - resultlist.append((torrent.file_path, - torrent.size, - gazelle.generate_torrent_link(torrent.id), - provider)) - - # Pirate Bay - if headphones.PIRATEBAY: - provider = "The Pirate Bay" - if headphones.PIRATEBAY_PROXY_URL: - #Might need to clean up the user submitted url - pirate_proxy = headphones.PIRATEBAY_PROXY_URL - - if not pirate_proxy.startswith('http'): - pirate_proxy = 'http://' + pirate_proxy - if pirate_proxy.endswith('/'): - pirate_proxy = pirate_proxy[:-1] - - providerurl = url_fix(pirate_proxy + "/search/" + term + "/0/99/") - - else: - providerurl = url_fix("http://thepiratebay.se/search/" + term + "/0/99/") - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - category = '104' #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - category = '100' #audio cat - maxsize = 10000000000 - else: - category = '101' #mp3 - maxsize = 300000000 - - searchURL = providerurl + category + # Pirate Bay + if headphones.PIRATEBAY: + provider = "The Pirate Bay" + if headphones.PIRATEBAY_PROXY_URL: + #Might need to clean up the user submitted url + pirate_proxy = headphones.PIRATEBAY_PROXY_URL - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from The Pirate Bay: %s' % e) - data = False + if not pirate_proxy.startswith('http'): + pirate_proxy = 'http://' + pirate_proxy + if pirate_proxy.endswith('/'): + pirate_proxy = pirate_proxy[:-1] + + providerurl = url_fix(pirate_proxy + "/search/" + term + "/0/99/") - if data: + else: + providerurl = url_fix("http://thepiratebay.se/search/" + term + "/0/99/") - logger.info(u'Parsing results from The Pirate Bay' % searchURL) - - soup = BeautifulSoup(data) - table = soup.find('table') - rows = None - if table: - rows = table.findAll('tr') - - if not rows or len(rows) == '1': - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in rows[1:]: - try: - rightformat = True - title = ''.join(item.find("a", {"class" : "detLink"})) - seeds = int(''.join(item.find("td", {"align" : "right"}))) - url = item.findAll("a")[3]['href'] - if headphones.TORRENT_DOWNLOADER == 0: - tor_hash = re.findall("urn:btih:(.*?)&", url) - if len(tor_hash) > 0: - url = "http://torrage.com/torrent/"+str(tor_hash[0]).upper()+".torrent" - else: - url = None - formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') - size = helpers.piratesize(formatted_size) - if size < maxsize and minimumseeders < seeds and url != None: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, formatted_size)) - else: - logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) - - except Exception, e: - logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + category = '104' #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + category = '100' #audio cat + maxsize = 10000000000 + else: + category = '101' #mp3 + maxsize = 300000000 - if headphones.ISOHUNT: - provider = "isoHunt" - providerurl = url_fix("http://isohunt.com/js/rss/" + term) - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - params = { - "iht": "2", - "sort": "seeds" - } - searchURL = providerurl + "?%s" % urllib.urlencode(params) - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from isoHunt' % searchURL) - - d = feedparser.parse(data) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: - try: - rightformat = True - title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title) - title = title.replace("[]","") - sxstart = item.description.find("Seeds: ") + 7 - seeds = "" - while item.description[sxstart:sxstart + 1] != " ": - seeds = seeds + item.description[sxstart:sxstart + 1] - sxstart = sxstart + 1 - url = item.links[1]['url'] - size = int(item.links[1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - for findterm in term.split(" "): - if not findterm in title: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < seeds: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) - - except Exception, e: - logger.error(u"An unknown error occurred in the isoHunt parser: %s" % e) - - if headphones.MININOVA: - provider = "Mininova" - providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5") - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - searchURL = providerurl - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from Mininova' % searchURL) - - d = feedparser.parse(data) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: - try: - rightformat = True - title = item.title - sxstart = item.description.find("Ratio: ") + 7 - seeds = "" - while item.description[sxstart:sxstart + 1] != " ": - seeds = seeds + item.description[sxstart:sxstart + 1] - sxstart = sxstart + 1 - url = item.links[1]['url'] - size = int(item.links[1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < seeds: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) - - except Exception, e: - logger.error(u"An unknown error occurred in the Mininova Parser: %s" % e) - - - - #attempt to verify that this isn't a substring result - #when looking for "Foo - Foo" we don't want "Foobar" - #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - if len(resultlist): - resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] - - if len(resultlist): - - # Add a priority if it has any of the preferred words - temp_list = [] - for result in resultlist: - if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): - temp_list.append((result[0],result[1],result[2],result[3],1)) - else: - temp_list.append((result[0],result[1],result[2],result[3],0)) - - resultlist = temp_list - - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - - logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) - - tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid]) - - try: - albumlength = sum([pair[0] for pair in tracks]) - - targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - - if not targetsize: - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) - newlist = [] - flac_list = [] - - if headphones.PREFERRED_BITRATE_HIGH_BUFFER: - high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 - else: - high_size_limit = None - if headphones.PREFERRED_BITRATE_LOW_BUFFER: - low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 - else: - low_size_limit = None - - for result in resultlist: - - if high_size_limit and (int(result[1]) > high_size_limit): - logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") - - # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches - if 'flac' in result[0].lower(): - flac_list.append((result[0], result[1], result[2], result[3], result[4])) - - continue - - if low_size_limit and (int(result[1]) < low_size_limit): - logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") - continue - - delta = abs(targetsize - int(result[1])) - newlist.append((result[0], result[1], result[2], result[3], result[4], delta)) - - torrentlist = sorted(newlist, key=lambda title: (-title[4], title[5])) - - if not len(torrentlist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: - logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") - torrentlist = sorted(flac_list, key=lambda title: (title[4], int(title[1])), reverse=True) - - except Exception, e: - - logger.debug('Error: %s' % str(e)) - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - if new: - - while True: - - if len(torrentlist): - - alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [torrentlist[0][2]]) - - if len(alreadydownloaded): - logger.info('%s has already been downloaded. Skipping.' % torrentlist[0][0]) - torrentlist.pop(0) - - else: - break - else: - logger.info('No more results found for %s' % term) - return - - logger.info(u"Pre-processing result") - - (data, bestqual) = preprocesstorrent(torrentlist, pre_sorted_results) - - if data and bestqual: - logger.info(u'Found best result from %s: %s - %s' % (bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) - - torrent_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year) - - # Blackhole - if headphones.TORRENT_DOWNLOADER == 0: - - if bestqual[2].startswith("magnet:"): - logger.error("Cannot save magnet files to blackhole. Please switch your torrent downloader to Transmission or uTorrent") - return - - # Get torrent name from .torrent, this is usually used by the torrent client as the folder name - - torrent_name = torrent_folder_name + '.torrent' - download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) - try: - if bestqual[3] == 'rutracker.org': - download_path = rutracker.get_torrent(bestqual[2], headphones.TORRENTBLACKHOLE_DIR) - if not download_path: - break - else: - #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name. - prev = os.umask(headphones.UMASK) - torrent_file = open(download_path, 'wb') - torrent_file.write(data) - torrent_file.close() - os.umask(prev) - - #Open the fresh torrent file again so we can extract the proper torrent name - #Used later in post-processing. - torrent_file = open(download_path, 'rb') - torrent_info = bencode.bdecode(torrent_file.read()) - torrent_file.close() - torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8') - logger.info('Torrent folder name: %s' % torrent_folder_name) - except Exception, e: - logger.error('Couldn\'t get name from Torrent file: %s' % e) - break - - elif headphones.TORRENT_DOWNLOADER == 1: - logger.info("Sending torrent to Transmission") - - # rutracker needs cookies to be set, pass the .torrent file instead of url - if bestqual[3] == 'rutracker.org': - file_or_url = rutracker.get_torrent(bestqual[2]) - else: - file_or_url = bestqual[2] - - torrentid = transmission.addTorrent(file_or_url) - - if not torrentid: - logger.error("Error sending torrent to Transmission. Are you sure it's running?") - return - - torrent_folder_name = transmission.getTorrentFolder(torrentid) - if torrent_folder_name: - logger.info('Torrent folder name: %s' % torrent_folder_name) - else: - logger.error('Torrent folder name could not be determined') - return - - # remove temp .torrent file created above - if bestqual[3] == 'rutracker.org': - try: - shutil.rmtree(os.path.split(file_or_url)[0]) - except Exception, e: - logger.warning('Couldn\'t remove temp dir %s' % e) - - myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name, "torrent"]) - -def preprocesstorrent(resultlist, pre_sorted_list=False): - - # Get out of here if we're using Transmission or uTorrent - if headphones.TORRENT_DOWNLOADER != 0: - return True, resultlist[0] + searchURL = providerurl + category - for result in resultlist: + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from The Pirate Bay: %s' % e) + data = False + + if data: + + logger.info(u'Parsing results from The Pirate Bay' % searchURL) + + soup = BeautifulSoup(data) + table = soup.find('table') + rows = None + if table: + rows = table.findAll('tr') + + if not rows or len(rows) == '1': + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in rows[1:]: + try: + rightformat = True + title = ''.join(item.find("a", {"class" : "detLink"})) + seeds = int(''.join(item.find("td", {"align" : "right"}))) + url = item.findAll("a")[3]['href'] + if headphones.TORRENT_DOWNLOADER == 0: + tor_hash = re.findall("urn:btih:(.*?)&", url) + if len(tor_hash) > 0: + url = "http://torrage.com/torrent/"+str(tor_hash[0]).upper()+".torrent" + else: + url = None + formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') + size = helpers.piratesize(formatted_size) + if size < maxsize and minimumseeders < seeds and url != None: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, formatted_size)) + else: + logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) + + except Exception, e: + logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e) - # get outta here if rutracker or piratebay - if result[3] == 'rutracker.org': - return True, result + if headphones.ISOHUNT: + provider = "isoHunt" + providerurl = url_fix("http://isohunt.com/js/rss/" + term) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + params = { + "iht": "2", + "sort": "seeds" + } + searchURL = providerurl + "?%s" % urllib.urlencode(params) try: - request = urllib2.Request(result[2]) - request.add_header('Accept-encoding', 'gzip') - - if result[3] == 'Kick Ass Torrent': - request.add_header('Referer', 'http://kat.ph/') + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False - if result[3] == 'What.cd': - request.add_header('User-Agent', 'Headphones') + if data: + + logger.info(u'Parsing results from isoHunt' % searchURL) + + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO(response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() else: - torrent = response.read() - except ExpatError: - logger.error('Unable to torrent %s' % result[2]) - continue + for item in d.entries: + try: + rightformat = True + title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title) + title = title.replace("[]","") + sxstart = item.description.find("Seeds: ") + 7 + seeds = "" + while item.description[sxstart:sxstart + 1] != " ": + seeds = seeds + item.description[sxstart:sxstart + 1] + sxstart = sxstart + 1 + url = item.links[1]['url'] + size = int(item.links[1]['length']) + try: + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False + except Exception, e: + rightformat = False + for findterm in term.split(" "): + if not findterm in title: + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < seeds: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) + + except Exception, e: + logger.error(u"An unknown error occurred in the isoHunt parser: %s" % e) + + if headphones.MININOVA: + provider = "Mininova" + providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5") + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + searchURL = providerurl + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False + + if data: + + logger.info(u'Parsing results from Mininova' % searchURL) + + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in d.entries: + try: + rightformat = True + title = item.title + sxstart = item.description.find("Ratio: ") + 7 + seeds = "" + while item.description[sxstart:sxstart + 1] != " ": + seeds = seeds + item.description[sxstart:sxstart + 1] + sxstart = sxstart + 1 + url = item.links[1]['url'] + size = int(item.links[1]['length']) + try: + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False + except Exception, e: + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < seeds: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) + + except Exception, e: + logger.error(u"An unknown error occurred in the Mininova Parser: %s" % e) + + #attempt to verify that this isn't a substring result + #when looking for "Foo - Foo" we don't want "Foobar" + #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + if len(resultlist): + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + if len(resultlist): + return resultlist + + +# THIS IS KIND OF A MESS AND PROBABLY NEEDS TO BE CLEANED UP +def preprocess(resultlist): + + for result in resultlist: + + if result[4] == 'torrent': + #Get out of here if we're using Transmission or uTorrent + if headphones.TORRENT_DOWNLOADER != 0: + return True, result + # get outta here if rutracker or piratebay + if result[3] == 'rutracker.org': + return True, result + + try: + request = urllib2.Request(result[2]) + request.add_header('Accept-encoding', 'gzip') + if result[3] == 'Kick Ass Torrent': + request.add_header('Referer', 'http://kat.ph/') + + if result[3] == 'What.cd': + request.add_header('User-Agent', 'Headphones') + + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO(response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + except ExpatError: + logger.error('Unable to torrent %s' % result[2]) + continue + + return torrent, result + + else: + if not headphones.USENET_RETENTION: + usenet_retention = 2000 + else: + usenet_retention = int(headphones.USENET_RETENTION) + + nzb = getresultNZB(result) + if nzb: + try: + d = minidom.parseString(nzb) + node = d.documentElement + nzbfiles = d.getElementsByTagName("file") + skipping = False + for nzbfile in nzbfiles: + if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400): + logger.info('NZB contains a file out of your retention. Skipping.') + skipping = True + break + if skipping: + continue + + #TODO: Do we want rar checking in here to try to keep unknowns out? + #or at least the option to do so? + except Exception, e: + logger.error('Unable to parse the best result NZB. Error: ' + str(e) + '. (Make sure your username/password/API is correct for provider: ' + result[3]) + continue + + return nzb, result + else: + logger.error("Couldn't retrieve the best nzb. Skipping.") + continue + + return (None, None) + - return torrent, result