From 269f346e1c2fa7ac2ed492f892a9acdb562e47bb Mon Sep 17 00:00:00 2001 From: rembo10 Date: Tue, 1 Apr 2014 22:39:30 -0700 Subject: [PATCH 1/6] Updated musicbrainz lib --- headphones/postprocessor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index 2a06aae1..0437781b 100644 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -757,15 +757,15 @@ def correctMetadata(albumid, release, downloaded_track_list): except Exception, e: logger.error('Error getting recommendation: %s. Not writing metadata' % e) return - if rec == 'RECOMMEND_NONE': + if rec == 'recommendation.none': logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) return - if candidates: - dist, info, mapping, extra_items, extra_tracks = candidates[0] - else: - logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) - return + #if candidates: + dist, info, mapping, extra_items, extra_tracks = candidates[0] + #else: + # logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) + # return logger.info('Beets recommendation for tagging items: %s' % rec) From acad25c534f98878d95c814ea46ee5f9346006b7 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Tue, 1 Apr 2014 17:29:13 -0700 Subject: [PATCH 2/6] Initial changes to get 'prefer torrents' option working --- data/interfaces/default/config.html | 6 ++++++ headphones/__init__.py | 5 ++++- headphones/webserve.py | 6 +++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 3f9115a5..1cd0f7de 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -250,6 +250,12 @@ +
+ + NZBs + Torrents + No Preference +
diff --git a/headphones/__init__.py b/headphones/__init__.py index b0bec3d6..edd377c0 100644 --- a/headphones/__init__.py +++ b/headphones/__init__.py @@ -126,6 +126,7 @@ EXTRAS = None AUTOWANT_UPCOMING = False AUTOWANT_ALL = False KEEP_TORRENT_FILES = False +PREFER_TORRENTS = None # 0: nzbs, 1: torrents, 2: no preference SEARCH_INTERVAL = 360 LIBRARYSCAN = False @@ -335,7 +336,7 @@ def initialize(): HTTP_PORT, HTTP_HOST, HTTP_USERNAME, HTTP_PASSWORD, HTTP_ROOT, HTTP_PROXY, LAUNCH_BROWSER, API_ENABLED, API_KEY, GIT_PATH, GIT_USER, GIT_BRANCH, DO_NOT_OVERRIDE_GIT_BRANCH, \ CURRENT_VERSION, LATEST_VERSION, CHECK_GITHUB, CHECK_GITHUB_ON_STARTUP, CHECK_GITHUB_INTERVAL, MUSIC_DIR, DESTINATION_DIR, \ LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \ - RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, FILE_UNDERSCORES, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, KEEP_TORRENT_FILES, \ + RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, FILE_UNDERSCORES, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, KEEP_TORRENT_FILES, PREFER_TORRENTS, \ ADD_ALBUM_ART, ALBUM_ART_FORMAT, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \ TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, PIRATEBAY, PIRATEBAY_PROXY_URL, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, \ RUTRACKER, RUTRACKER_USER, RUTRACKER_PASSWORD, WHATCD, WHATCD_USERNAME, WHATCD_PASSWORD, DOWNLOAD_TORRENT_DIR, \ @@ -451,6 +452,7 @@ def initialize(): AUTOWANT_UPCOMING = bool(check_setting_int(CFG, 'General', 'autowant_upcoming', 1)) AUTOWANT_ALL = bool(check_setting_int(CFG, 'General', 'autowant_all', 0)) KEEP_TORRENT_FILES = bool(check_setting_int(CFG, 'General', 'keep_torrent_files', 0)) + PREFER_TORRENTS = check_setting_int(CFG, 'General', 'prefer_torrents', 0) SEARCH_INTERVAL = check_setting_int(CFG, 'General', 'search_interval', 1440) LIBRARYSCAN = bool(check_setting_int(CFG, 'General', 'libraryscan', 1)) @@ -857,6 +859,7 @@ def config_write(): new_config['General']['autowant_upcoming'] = int(AUTOWANT_UPCOMING) new_config['General']['autowant_all'] = int(AUTOWANT_ALL) new_config['General']['keep_torrent_files'] = int(KEEP_TORRENT_FILES) + new_config['General']['prefer_torrents'] = PREFER_TORRENTS new_config['General']['numberofseeders'] = NUMBEROFSEEDERS new_config['General']['torrentblackhole_dir'] = TORRENTBLACKHOLE_DIR diff --git a/headphones/webserve.py b/headphones/webserve.py index 83bf3513..b21f10d7 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -948,6 +948,9 @@ class WebInterface(object): "autowant_upcoming" : checked(headphones.AUTOWANT_UPCOMING), "autowant_all" : checked(headphones.AUTOWANT_ALL), "keep_torrent_files" : checked(headphones.KEEP_TORRENT_FILES), + "prefer_torrents_0" : radio(headphones.PREFER_TORRENTS, 0), + "prefer_torrents_1" : radio(headphones.PREFER_TORRENTS, 1), + "prefer_torrents_2" : radio(headphones.PREFER_TORRENTS, 2), "log_dir" : headphones.LOG_DIR, "cache_dir" : headphones.CACHE_DIR, "interface_list" : interface_list, @@ -1047,7 +1050,7 @@ class WebInterface(object): numberofseeders=None, use_piratebay=0, piratebay_proxy_url=None, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, whatcd=0, whatcd_username=None, whatcd_password=None, rutracker=0, rutracker_user=None, rutracker_password=None, rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, album_art_format=None, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None, file_underscores=0, include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, - remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, keep_torrent_files=False, interface=None, log_dir=None, cache_dir=None, music_encoder=0, encoder=None, xldprofile=None, + remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, keep_torrent_files=False, prefer_torrents=0, interface=None, log_dir=None, cache_dir=None, music_encoder=0, encoder=None, xldprofile=None, bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None, encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0, growl_enabled=0, growl_onsnatch=0, growl_host=None, growl_password=None, prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0, xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, pushalot_enabled=False, pushalot_apikey=None, pushalot_onsnatch=0, synoindex_enabled=False, @@ -1148,6 +1151,7 @@ class WebInterface(object): headphones.AUTOWANT_UPCOMING = autowant_upcoming headphones.AUTOWANT_ALL = autowant_all headphones.KEEP_TORRENT_FILES = keep_torrent_files + headphones.PREFER_TORRENTS = int(prefer_torrents) headphones.INTERFACE = interface headphones.LOG_DIR = log_dir headphones.CACHE_DIR = cache_dir From 864ea22a7005edd72218d4212db1b6203bde089a Mon Sep 17 00:00:00 2001 From: rembo10 Date: Tue, 1 Apr 2014 20:34:51 -0700 Subject: [PATCH 3/6] Pulled in a few beets updates --- lib/beets/autotag/__init__.py | 2 +- lib/beets/autotag/hooks.py | 2 +- lib/beets/autotag/match.py | 2 +- lib/beets/library.py | 2 +- lib/beets/ui/commands.py | 21 ++++++++++++++++----- 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py index aaec990c..a3696354 100644 --- a/lib/beets/autotag/__init__.py +++ b/lib/beets/autotag/__init__.py @@ -242,6 +242,6 @@ def apply_metadata(album_info, mapping): item[field] = value if track_info.disctitle is not None: item.disctitle = track_info.disctitle - + # Headphones seal of approval item.comments = 'tagged by headphones/beets' diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py index 5ba841bf..74c8cf82 100644 --- a/lib/beets/autotag/hooks.py +++ b/lib/beets/autotag/hooks.py @@ -21,7 +21,7 @@ from beets import plugins from beets import config from beets.autotag import mb from beets.util import levenshtein -from lib.unidecode import unidecode +from unidecode import unidecode log = logging.getLogger('beets') diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py index 082a8c2f..a4bc47fa 100644 --- a/lib/beets/autotag/match.py +++ b/lib/beets/autotag/match.py @@ -20,7 +20,7 @@ from __future__ import division import datetime import logging import re -from lib.munkres import Munkres +from munkres import Munkres from beets import plugins from beets import config diff --git a/lib/beets/library.py b/lib/beets/library.py index 9102aff7..94559430 100644 --- a/lib/beets/library.py +++ b/lib/beets/library.py @@ -21,7 +21,7 @@ import logging import shlex import unicodedata import time -from lib.unidecode import unidecode +from unidecode import unidecode from beets.mediafile import MediaFile, MutagenError from beets import plugins from beets import util diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py index d0ff320a..e7e631a4 100644 --- a/lib/beets/ui/commands.py +++ b/lib/beets/ui/commands.py @@ -1082,7 +1082,7 @@ default_commands.append(version_cmd) # modify: Declaratively change metadata. -def modify_items(lib, mods, query, write, move, album, confirm): +def modify_items(lib, mods, dels, query, write, move, album, confirm): """Modifies matching items according to key=value assignments.""" # Parse key=value specifications into a dictionary. model_cls = library.Album if album else library.Item @@ -1102,6 +1102,8 @@ def modify_items(lib, mods, query, write, move, album, confirm): for obj in objs: for field, value in fsets.iteritems(): obj[field] = value + for field in dels: + del obj[field] if ui.show_model_changes(obj): changed.add(obj) @@ -1155,13 +1157,22 @@ modify_cmd.parser.add_option('-f', '--format', action='store', help='print with custom format', default=None) def modify_func(lib, opts, args): args = decargs(args) - mods = [a for a in args if '=' in a] - query = [a for a in args if '=' not in a] - if not mods: + mods = [] + dels = [] + query = [] + for arg in args: + if arg.endswith('!') and '=' not in arg and ':' not in arg: + dels.append(arg[:-1]) + elif '=' in arg: + mods.append(arg) + else: + query.append(arg) + if not mods and not dels: raise ui.UserError('no modifications specified') write = opts.write if opts.write is not None else \ config['import']['write'].get(bool) - modify_items(lib, mods, query, write, opts.move, opts.album, not opts.yes) + modify_items(lib, mods, dels, query, write, opts.move, opts.album, + not opts.yes) modify_cmd.func = modify_func default_commands.append(modify_cmd) From 1cff59fdaf2b708353e9f46c050d15f46935fb00 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Tue, 1 Apr 2014 23:14:48 -0700 Subject: [PATCH 4/6] Reverted a couple changes for bug testing --- headphones/postprocessor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index 0437781b..04c1c526 100644 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -761,11 +761,11 @@ def correctMetadata(albumid, release, downloaded_track_list): logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) return - #if candidates: - dist, info, mapping, extra_items, extra_tracks = candidates[0] - #else: - # logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) - # return + if candidates: + dist, info, mapping, extra_items, extra_tracks = candidates[0] + else: + logger.warn('No accurate album match found for %s, %s - not writing metadata' % (release['ArtistName'], release['AlbumTitle'])) + return logger.info('Beets recommendation for tagging items: %s' % rec) From 6279a95b6c1104dc55d8ef1753d1413374105cd0 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Thu, 3 Apr 2014 19:29:12 -0700 Subject: [PATCH 5/6] Heavily modified searcher.py in order to allow searching for torrents over nzbs & combining results to pick the best of both --- headphones/searcher.py | 2459 +++++++++++++++++++--------------------- 1 file changed, 1187 insertions(+), 1272 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 89214b81..cd03cf48 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -108,566 +108,659 @@ def patch_http_response_read(func): httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read) -def searchforalbum(albumid=None, new=False, lossless=False): +def searchforalbum(albumid=None, new=False, losslessOnly=False): + myDB = db.DBConnection() + if not albumid: - myDB = db.DBConnection() - - results = myDB.select('SELECT AlbumID, AlbumTitle, ArtistName, Status from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True + results = myDB.select('SELECT * from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - for result in results: - foundNZB = "none" - if not result['AlbumTitle'] or not result['ArtistName']: - logger.warn('Skipping release %s. No title available' % result['AlbumID']) - else: - if (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) and (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST): - if result['Status'] == "Wanted Lossless": - foundNZB = searchNZB(result['AlbumID'], new, losslessOnly=True) - else: - foundNZB = searchNZB(result['AlbumID'], new) - - if (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) and foundNZB == "none": - - if result['Status'] == "Wanted Lossless": - searchTorrent(result['AlbumID'], new, losslessOnly=True) - else: - searchTorrent(result['AlbumID'], new) + for album in results: + + if not album['AlbumTitle'] or not album['ArtistName']: + logger.warn('Skipping release %s. No title available' % album['AlbumID']) + continue + + new = True + + if album['Status'] == "Wanted Lossless": + losslessOnly = True + + do_sorted_search(album, new, losslessOnly) + + else: + logger.info("Got to zero") + album = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone() + logger.info('Searching for %s' % album['AlbumTitle']) + do_sorted_search(album, new, losslessOnly) + + logger.info('Search for Wanted albums complete') + +def do_sorted_search(album, new, losslessOnly): + + NZB_PROVIDERS = (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) + NZB_DOWNLOADERS = (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST) + TORRENT_PROVIDERS = (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) + + results = [] + + if headphones.PREFER_TORRENTS == 0: + + if NZB_PROVIDERS and NZB_DOWNLOADERS: + results = searchNZB(album, new, losslessOnly) + + if not results and TORRENT_PROVIDERS: + results = searchTorrent(album, new, losslessOnly) + + elif headphones.PREFER_TORRENTS == 1: + + if TORRENT_PROVIDERS: + results = searchTorrent(album, new, losslessOnly) + + if not results and NZB_PROVIDERS and NZB_DOWNLOADERS: + results = searchNZB(album, new, losslessOnly) else: - foundNZB = "none" - - if (headphones.HEADPHONES_INDEXER or headphones.NEWZNAB or headphones.NZBSORG or headphones.NZBSRUS or headphones.OMGWTFNZBS) and (headphones.SAB_HOST or headphones.BLACKHOLE_DIR or headphones.NZBGET_HOST): - foundNZB = searchNZB(albumid, new, lossless) + if NZB_PROVIDERS and NZB_DOWNLOADERS: + nzb_results = searchNZB(album, new, losslessOnly) - if (headphones.KAT or headphones.PIRATEBAY or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER or headphones.WHATCD) and foundNZB == "none": - searchTorrent(albumid, new, lossless) + if TORRENT_PROVIDERS: + torrent_results = searchTorrent(album, new, losslessOnly) - logger.info('Search for Wanted albums complete') - -def searchNZB(albumid=None, new=False, losslessOnly=False): + results = nzb_results + torrent_results + + sorted_search_results = sort_search_results(results, album, new) + logger.info(u"Making sure we can download the best result") + (data, bestqual) = preprocess(sorted_search_results) + + if data and bestqual: + send_to_downloader(data, bestqual, album) + +def sort_search_results(resultlist, album, new): myDB = db.DBConnection() - if albumid: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, Type, SearchTerm from albums WHERE AlbumID=?', [albumid]) - else: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, Type, SearchTerm from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True + # Add a priority if it has any of the preferred words + temp_list = [] + for result in resultlist: + if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): + temp_list.append((result[0],result[1],result[2],result[3],result[4],1)) + else: + temp_list.append((result[0],result[1],result[2],result[3],result[4],0)) - for albums in results: + resultlist = temp_list - albumid = albums[2] - reldate = albums[3] + if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: + + logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) + + tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [album['AlbumID']]) try: - year = reldate[:4] - except TypeError: - year = '' + albumlength = sum([pair[0] for pair in tracks]) - dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''} + targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic)).strip() - cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic)).strip() + if not targetsize: + logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle'])) + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) - # Use the provided search term if available, otherwise build a search term - if albums[5]: - term = albums[5] + else: + logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) + newlist = [] + flac_list = [] + if headphones.PREFERRED_BITRATE_HIGH_BUFFER: + high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 + else: + high_size_limit = None + if headphones.PREFERRED_BITRATE_LOW_BUFFER: + low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 + else: + low_size_limit = None + + for result in resultlist: + + if high_size_limit and (int(result[1]) > high_size_limit): + + logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") + + # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches + if 'flac' in result[0].lower(): + flac_list.append((result[0], result[1], result[2], result[3], result[4], result[5])) + + continue + + if low_size_limit and (int(result[1]) < low_size_limit): + logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") + continue + + delta = abs(targetsize - int(result[1])) + newlist.append((result[0], result[1], result[2], result[3], result[4], result[5], delta)) + + finallist = sorted(newlist, key=lambda title: (-title[5], title[6])) + + if not len(finallist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: + logger.info("Since there were no appropriate lossy matches (and at least one lossless match, going to use lossless instead") + finallist = sorted(flac_list, key=lambda title: (title[5], int(title[1])), reverse=True) + + except Exception, e: + + logger.debug('Error: %s' % str(e)) + logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle'])) + + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + + else: + + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + + if new: + + while True: + + if len(finallist): + + alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [finallist[0][2]]) + + if len(alreadydownloaded): + logger.info('%s has already been downloaded. Skipping.' % finallist[0][0]) + finallist.pop(0) + + else: + break + else: + logger.info('No more results found for %s' % term) + return None + + if not len(finallist): + logger.info('No appropriate matches found for %s' % term) + return None + + return finallist + +def get_year_from_release_date(release_date): + + try: + year = release_date[:4] + except TypeError: + year = '' + + return year + +def searchNZB(album, new=False, losslessOnly=False): + + albumid = album['AlbumID'] + reldate = album['ReleaseDate'] + + year = get_year_from_release_date(reldate) + + dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '.':'', ':':''} + + cleanalbum = helpers.latinToAscii(helpers.replace_all(album['AlbumTitle'], dic)).strip() + cleanartist = helpers.latinToAscii(helpers.replace_all(album['ArtistName'], dic)).strip() + + # Use the provided search term if available, otherwise build a search term + if album['SearchTerm']: + term = album['SearchTerm'] + + else: + # FLAC usually doesn't have a year for some reason so I'll leave it out + # Various Artist albums might be listed as VA, so I'll leave that out too + # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + term = cleanartist + ' ' + cleanalbum + ' ' + year + elif album['ArtistName'] == 'Various Artists': + term = cleanalbum + ' ' + year else: - # FLAC usually doesn't have a year for some reason so I'll leave it out - # Various Artist albums might be listed as VA, so I'll leave that out too - # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4: - term = cleanartist + ' ' + cleanalbum + ' ' + year - elif albums[0] == 'Various Artists': - term = cleanalbum + ' ' + year - else: - term = cleanartist + ' ' + cleanalbum + term = cleanartist + ' ' + cleanalbum - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') + # Replace bad characters in the term and unicode it + term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8') + artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8') - logger.info("Searching for %s since it was marked as wanted" % term) + logger.info("Searching for %s since it was marked as wanted" % term) - resultlist = [] + resultlist = [] - if headphones.HEADPHONES_INDEXER: + if headphones.HEADPHONES_INDEXER: - provider = "headphones" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" + provider = "headphones" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" + else: + categories = "3010" - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") - params = { "t": "search", - "cat": categories, - "apikey": '89edf227c1de9b3de50383fff11466c6', - "maxage": headphones.USENET_RETENTION, - "q": term - } + params = { "t": "search", + "cat": categories, + "apikey": '89edf227c1de9b3de50383fff11466c6', + "maxage": headphones.USENET_RETENTION, + "q": term + } - searchURL = 'http://headphones.codeshy.com/newznab/api?' + urllib.urlencode(params) + searchURL = 'http://headphones.codeshy.com/newznab/api?' + urllib.urlencode(params) - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - base64string = base64.encodestring('%s:%s' % (headphones.HPUSER, headphones.HPPASS)).replace('\n', '') - request.add_header("Authorization", "Basic %s" % base64string) - - opener = urllib2.build_opener() + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + base64string = base64.encodestring('%s:%s' % (headphones.HPUSER, headphones.HPPASS)).replace('\n', '') + request.add_header("Authorization", "Basic %s" % base64string) + + opener = urllib2.build_opener() - logger.info(u'Parsing results from %s' % (searchURL, 'Headphones Index')) + logger.info(u'Parsing results from %s' % (searchURL, 'Headphones Index')) - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from %s: %s' % ('Headphones Index', e)) - data = False + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from %s: %s' % ('Headphones Index', e)) + data = False - if data: + if data: - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % ('Headphones Index', term)) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NEWZNAB: - - newznab_hosts = [] - - for newznab_host in headphones.EXTRA_NEWZNABS: - if newznab_host[2] == '1' or newznab_host[2] == 1: - newznab_hosts.append(newznab_host) - - provider = "newznab" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" - - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") - - for newznab_host in newznab_hosts: - - # Add a little mod for kere.ws - if newznab_host[0] == "http://kere.ws": - if categories == "3040": - categories = categories + ",4070" - elif categories == "3040,3010": - categories = categories + ",4070,4010" - elif categories == "3010": - categories = categories + ",4010" - else: - categories = categories + ",4050" - - params = { "t": "search", - "apikey": newznab_host[1], - "cat": categories, - "maxage": headphones.USENET_RETENTION, - "q": term - } - - searchURL = newznab_host[0] + '/api?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from %s' % (searchURL, newznab_host[0])) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from %s: %s' % (newznab_host[0], e)) - data = False - - if data: - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (newznab_host[0], term)) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NZBSORG: - provider = "nzbsorg" - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "3040" - elif headphones.PREFERRED_QUALITY: - categories = "3040,3010" - else: - categories = "3010" - - if albums['Type'] == 'Other': - categories = "3030" - logger.info("Album type is audiobook/spokenword. Using audiobook category") - - params = { "t": "search", - "apikey": headphones.NZBSORG_HASH, - "cat": categories, - "maxage": headphones.USENET_RETENTION, - "q": term - } - - searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params) - - logger.info(u'Parsing results from nzbs.org' % searchURL) - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from nzbs.org: %s' % e) - data = False - - if data: - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from nzbs.org for %s" % term) - pass - - else: - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - if headphones.NZBSRUS: - - provider = "nzbsrus" - categories = "54" - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - sub = "16" - elif headphones.PREFERRED_QUALITY: - sub = "" - else: - sub = "15" - - if albums['Type'] == 'Other': - sub = "" - logger.info("Album type is audiobook/spokenword. Searching all music categories") - - params = { "uid": headphones.NZBSRUS_UID, - "key": headphones.NZBSRUS_APIKEY, - "cat": categories, - "sub": sub, - "age": headphones.USENET_RETENTION, - "searchtext": term - } - - searchURL = 'https://www.nzbsrus.com/api.php?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from NZBsRus' % searchURL) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from NZBsRus: %s' % e) - data = False - - if data: - - d = json.loads(data) - - if d['matches'] <= 0: - logger.info(u"No results found from NZBsRus for %s" % term) - pass - - else: - for item in d['results']: - try: - url = "http://www.nzbsrus.com/nzbdownload_rss.php/" + item['id'] + "/" + headphones.NZBSRUS_UID + "/" + item['key'] - title = item['name'] - size = int(item['size']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) - - - if headphones.OMGWTFNZBS: - - provider = "omgwtfnzbs" - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "22" - elif headphones.PREFERRED_QUALITY: - categories = "22,7" - else: - categories = "7" - - if albums['Type'] == 'Other': - categories = "29" - logger.info("Album type is audiobook/spokenword. Searching all music categories") - - params = { "user": headphones.OMGWTFNZBS_UID, - "api": headphones.OMGWTFNZBS_APIKEY, - "catid": categories, - "retention": headphones.USENET_RETENTION, - "search": term - } - - searchURL = 'http://api.omgwtfnzbs.org/json/?' + urllib.urlencode(params) - - # Add a user-agent - request = urllib2.Request(searchURL) - request.add_header('User-Agent', USER_AGENT) - opener = urllib2.build_opener() - - logger.info(u'Parsing results from omgwtfnzbs' % searchURL) - - try: - data = opener.open(request).read() - except Exception, e: - logger.warn('Error fetching data from omgwtfnzbs: %s' % e) - data = False - - if data: - - d = json.loads(data) - - if 'notice' in data: - logger.info(u"No results returned from omgwtfnzbs: %s" % d['notice']) - pass - - else: - for item in d: - try: - url = item['getnzb'] - title = item['release'] - size = int(item['sizebytes']) - - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - - except Exception, e: - logger.error(u"An unknown error occurred trying to parse the results: %s" % e) - - # attempt to verify that this isn't a substring result - # when looking for "Foo - Foo" we don't want "Foobar" - # this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - # - # Also will filter flac & remix albums if not specifically looking for it - # This code also checks the ignored words and required words - - if len(resultlist): - resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] - - if len(resultlist): - - # Add a priority if it has any of the preferred words - temp_list = [] - for result in resultlist: - if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): - temp_list.append((result[0],result[1],result[2],result[3],1)) - else: - temp_list.append((result[0],result[1],result[2],result[3],0)) - - resultlist = temp_list - - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - - logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) - - tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid]) - - try: - albumlength = sum([pair[0] for pair in tracks]) - - targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - - if not targetsize: - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) - newlist = [] - flac_list = [] - - if headphones.PREFERRED_BITRATE_HIGH_BUFFER: - high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 - else: - high_size_limit = None - if headphones.PREFERRED_BITRATE_LOW_BUFFER: - low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 - else: - low_size_limit = None - - for result in resultlist: - - if high_size_limit and (int(result[1]) > high_size_limit): - - logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") - - # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches - if 'flac' in result[0].lower(): - flac_list.append((result[0], result[1], result[2], result[3], result[4])) - - continue - - if low_size_limit and (int(result[1]) < low_size_limit): - logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") - continue - - delta = abs(targetsize - int(result[1])) - newlist.append((result[0], result[1], result[2], result[3], result[4], delta)) - - nzblist = sorted(newlist, key=lambda title: (-title[4], title[5])) - - if not len(nzblist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: - logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") - nzblist = sorted(flac_list, key=lambda title: (title[4], int(title[1])), reverse=True) - - except Exception, e: - - logger.debug('Error: %s' % str(e)) - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % ('Headphones Index', term)) + pass else: - - nzblist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - - - if new: - - while True: - - if len(nzblist): - - alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [nzblist[0][2]]) - - if len(alreadydownloaded): - logger.info('%s has already been downloaded. Skipping.' % nzblist[0][0]) - nzblist.pop(0) - - else: - break - else: - logger.info('No more results found for %s' % term) - return "none" - - if not len(nzblist): - logger.info('No appropriate matches found for %s' % term) - return "none" - - logger.info(u"Pre-processing result") - - (data, bestqual) = preprocess(nzblist) - - if data and bestqual: - logger.info(u'Found best result: %s - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) - # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads - nzb_folder_name = helpers.sab_sanitize_foldername(bestqual[0]) - if headphones.NZB_DOWNLOADER == 1: - - nzb = classes.NZBDataSearchResult() - nzb.extraInfo.append(data) - nzb.name = nzb_folder_name - nzbget.sendNZB(nzb) - - elif headphones.NZB_DOWNLOADER == 0: - - nzb = classes.NZBDataSearchResult() - nzb.extraInfo.append(data) - nzb.name = nzb_folder_name - sab.sendNZB(nzb) - - # If we sent the file to sab, we can check how it was renamed and insert that into the snatched table - (replace_spaces, replace_dots) = sab.checkConfig() - - if replace_dots: - nzb_folder_name = helpers.sab_replace_dots(nzb_folder_name) - if replace_spaces: - nzb_folder_name = helpers.sab_replace_spaces(nzb_folder_name) - - else: - - nzb_name = nzb_folder_name + '.nzb' - download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name) + for item in d.entries: try: - prev = os.umask(headphones.UMASK) - f = open(download_path, 'w') - f.write(data) - f.close() - os.umask(prev) - logger.info('File saved to: %s' % nzb_name) + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + except Exception, e: - logger.error('Couldn\'t write NZB file: %s' % e) - break + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NEWZNAB: - myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", nzb_folder_name, "nzb"]) - return "found" - else: - return "none" + newznab_hosts = [] + + for newznab_host in headphones.EXTRA_NEWZNABS: + if newznab_host[2] == '1' or newznab_host[2] == 1: + newznab_hosts.append(newznab_host) + + provider = "newznab" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" else: - return "none" + categories = "3010" + + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") + + for newznab_host in newznab_hosts: + + # Add a little mod for kere.ws + if newznab_host[0] == "http://kere.ws": + if categories == "3040": + categories = categories + ",4070" + elif categories == "3040,3010": + categories = categories + ",4070,4010" + elif categories == "3010": + categories = categories + ",4010" + else: + categories = categories + ",4050" + + params = { "t": "search", + "apikey": newznab_host[1], + "cat": categories, + "maxage": headphones.USENET_RETENTION, + "q": term + } + + searchURL = newznab_host[0] + '/api?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from %s' % (searchURL, newznab_host[0])) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from %s: %s' % (newznab_host[0], e)) + data = False + + if data: + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (newznab_host[0], term)) + pass + + else: + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NZBSORG: + provider = "nzbsorg" + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "3040" + elif headphones.PREFERRED_QUALITY: + categories = "3040,3010" + else: + categories = "3010" + + if album['Type'] == 'Other': + categories = "3030" + logger.info("Album type is audiobook/spokenword. Using audiobook category") + + params = { "t": "search", + "apikey": headphones.NZBSORG_HASH, + "cat": categories, + "maxage": headphones.USENET_RETENTION, + "q": term + } + + searchURL = 'http://beta.nzbs.org/api?' + urllib.urlencode(params) + + logger.info(u'Parsing results from nzbs.org' % searchURL) + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from nzbs.org: %s' % e) + data = False + + if data: + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from nzbs.org for %s" % term) + pass + + else: + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + + if headphones.NZBSRUS: + + provider = "nzbsrus" + categories = "54" + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + sub = "16" + elif headphones.PREFERRED_QUALITY: + sub = "" + else: + sub = "15" + + if album['Type'] == 'Other': + sub = "" + logger.info("Album type is audiobook/spokenword. Searching all music categories") + + params = { "uid": headphones.NZBSRUS_UID, + "key": headphones.NZBSRUS_APIKEY, + "cat": categories, + "sub": sub, + "age": headphones.USENET_RETENTION, + "searchtext": term + } + + searchURL = 'https://www.nzbsrus.com/api.php?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from NZBsRus' % searchURL) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from NZBsRus: %s' % e) + data = False + + if data: + + d = json.loads(data) + + if d['matches'] <= 0: + logger.info(u"No results found from NZBsRus for %s" % term) + pass + + else: + for item in d['results']: + try: + url = "http://www.nzbsrus.com/nzbdownload_rss.php/" + item['id'] + "/" + headphones.NZBSRUS_UID + "/" + item['key'] + title = item['name'] + size = int(item['size']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + if headphones.OMGWTFNZBS: + + provider = "omgwtfnzbs" + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "22" + elif headphones.PREFERRED_QUALITY: + categories = "22,7" + else: + categories = "7" + + if album['Type'] == 'Other': + categories = "29" + logger.info("Album type is audiobook/spokenword. Searching all music categories") + + params = { "user": headphones.OMGWTFNZBS_UID, + "api": headphones.OMGWTFNZBS_APIKEY, + "catid": categories, + "retention": headphones.USENET_RETENTION, + "search": term + } + + searchURL = 'http://api.omgwtfnzbs.org/json/?' + urllib.urlencode(params) + + # Add a user-agent + request = urllib2.Request(searchURL) + request.add_header('User-Agent', USER_AGENT) + opener = urllib2.build_opener() + + logger.info(u'Parsing results from omgwtfnzbs' % searchURL) + + try: + data = opener.open(request).read() + except Exception, e: + logger.warn('Error fetching data from omgwtfnzbs: %s' % e) + data = False + + if data: + + d = json.loads(data) + + if 'notice' in data: + logger.info(u"No results returned from omgwtfnzbs: %s" % d['notice']) + pass + + else: + for item in d: + try: + url = item['getnzb'] + title = item['release'] + size = int(item['sizebytes']) + + resultlist.append((title, size, url, provider, 'nzb')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + + except Exception, e: + logger.error(u"An unknown error occurred trying to parse the results: %s" % e) + + # attempt to verify that this isn't a substring result + # when looking for "Foo - Foo" we don't want "Foobar" + # this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + # + # Also will filter flac & remix albums if not specifically looking for it + # This code also checks the ignored words and required words + + if len(resultlist): + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + + if len(resultlist): + return resultlist + + +def send_to_downloader(data, bestqual, album): + + logger.info(u'Found best result from %s: %s - %s' % (bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) + # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads + kind = bestqual[4] + + if kind == 'nzb': + folder_name = helpers.sab_sanitize_foldername(bestqual[0]) + + if headphones.NZB_DOWNLOADER == 1: + + nzb = classes.NZBDataSearchResult() + nzb.extraInfo.append(data) + nzb.name = nzb_folder_name + nzbget.sendNZB(nzb) + + elif headphones.NZB_DOWNLOADER == 0: + + nzb = classes.NZBDataSearchResult() + nzb.extraInfo.append(data) + nzb.name = nzb_folder_name + sab.sendNZB(nzb) + + # If we sent the file to sab, we can check how it was renamed and insert that into the snatched table + (replace_spaces, replace_dots) = sab.checkConfig() + + if replace_dots: + nzb_folder_name = helpers.sab_replace_dots(nzb_folder_name) + if replace_spaces: + nzb_folder_name = helpers.sab_replace_spaces(nzb_folder_name) + + else: + + nzb_name = nzb_folder_name + '.nzb' + download_path = os.path.join(headphones.BLACKHOLE_DIR, nzb_name) + try: + prev = os.umask(headphones.UMASK) + f = open(download_path, 'w') + f.write(data) + f.close() + os.umask(prev) + logger.info('File saved to: %s' % nzb_name) + except Exception, e: + logger.error('Couldn\'t write NZB file: %s' % e) + return + else: + folder_name = '%s - %s [%s]' % (helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), get_year_from_release_date(album['ReleaseDate'])) + + # Blackhole + if headphones.TORRENT_DOWNLOADER == 0: + + if bestqual[2].startswith("magnet:"): + logger.error("Cannot save magnet files to blackhole. Please switch your torrent downloader to Transmission or uTorrent") + return + + # Get torrent name from .torrent, this is usually used by the torrent client as the folder name + + torrent_name = folder_name + '.torrent' + download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) + try: + if bestqual[3] == 'rutracker.org': + download_path = rutracker.get_torrent(bestqual[2], headphones.TORRENTBLACKHOLE_DIR) + if not download_path: + return + else: + #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name. + prev = os.umask(headphones.UMASK) + torrent_file = open(download_path, 'wb') + torrent_file.write(data) + torrent_file.close() + os.umask(prev) + + #Open the fresh torrent file again so we can extract the proper torrent name + #Used later in post-processing. + torrent_file = open(download_path, 'rb') + torrent_info = bencode.bdecode(torrent_file.read()) + torrent_file.close() + torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8') + logger.info('Torrent folder name: %s' % torrent_folder_name) + except Exception, e: + logger.error('Couldn\'t get name from Torrent file: %s' % e) + return + + elif headphones.TORRENT_DOWNLOADER == 1: + logger.info("Sending torrent to Transmission") + + # rutracker needs cookies to be set, pass the .torrent file instead of url + if bestqual[3] == 'rutracker.org': + file_or_url = rutracker.get_torrent(bestqual[2]) + else: + file_or_url = bestqual[2] + + torrentid = transmission.addTorrent(file_or_url) + + if not torrentid: + logger.error("Error sending torrent to Transmission. Are you sure it's running?") + return + + folder_name = transmission.getTorrentFolder(torrentid) + if folder_name: + logger.info('Torrent folder name: %s' % folder_name) + else: + logger.error('Torrent folder name could not be determined') + return + + # remove temp .torrent file created above + if bestqual[3] == 'rutracker.org': + try: + shutil.rmtree(os.path.split(file_or_url)[0]) + except Exception, e: + logger.warning('Couldn\'t remove temp dir %s' % e) + + myDB = db.DBConnection() + myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [album['AlbumID']]) + myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Snatched", folder_name, kind]) def verifyresult(title, artistterm, term, lossless): @@ -768,811 +861,633 @@ def getresultNZB(result): logger.warn('Error fetching nzb from url: ' + result[2] + ' %s' % e) return nzb -def preprocess(resultlist): +def searchTorrent(album, new=False, losslessOnly=False): - if not headphones.USENET_RETENTION: - usenet_retention = 2000 - else: - usenet_retention = int(headphones.USENET_RETENTION) - - for result in resultlist: - nzb = getresultNZB(result) - if nzb: - try: - d = minidom.parseString(nzb) - node = d.documentElement - nzbfiles = d.getElementsByTagName("file") - skipping = False - for nzbfile in nzbfiles: - if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400): - logger.info('NZB contains a file out of your retention. Skipping.') - skipping = True - break - if skipping: - continue - - #TODO: Do we want rar checking in here to try to keep unknowns out? - #or at least the option to do so? - except Exception, e: - logger.error('Unable to parse the best result NZB. Error: ' + str(e) + '. (Make sure your username/password/API is correct for provider: ' + result[3]) - continue - return nzb, result - else: - logger.error("Couldn't retrieve the best nzb. Skipping.") - return (False, False) - - - -def searchTorrent(albumid=None, new=False, losslessOnly=False): global gazelle # persistent what.cd api object to reduce number of login attempts - myDB = db.DBConnection() - - if albumid: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, SearchTerm from albums WHERE AlbumID=?', [albumid]) - else: - results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate, SearchTerm from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') - new = True - # rutracker login - if headphones.RUTRACKER and results: + if headphones.RUTRACKER and album: rulogin = rutracker.login(headphones.RUTRACKER_USER, headphones.RUTRACKER_PASSWORD) if not rulogin: logger.info(u'Could not login to rutracker, search results will exclude this provider') - for albums in results: + albumid = album['AlbumID'] + reldate = album['ReleaseDate'] - albumid = albums[2] - reldate = albums[3] + year = get_year_from_release_date(reldate) + + # MERGE THIS WITH THE TERM CLEANUP FROM searchNZB + dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':' ', '*':''} + + semi_cleanalbum = helpers.replace_all(album['AlbumTitle'], dic) + cleanalbum = helpers.latinToAscii(semi_cleanalbum) + semi_cleanartist = helpers.replace_all(album['ArtistName'], dic) + cleanartist = helpers.latinToAscii(semi_cleanartist) + + # Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot + # of these torrent providers are just using cleanartist/cleanalbum terms + if album['SearchTerm']: + term = album['SearchTerm'] + + else: + # FLAC usually doesn't have a year for some reason so I'll leave it out + # Various Artist albums might be listed as VA, so I'll leave that out too + # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + term = cleanartist + ' ' + cleanalbum + ' ' + year + elif album['ArtistName'] == 'Various Artists': + term = cleanalbum + ' ' + year + else: + term = cleanartist + ' ' + cleanalbum + + # Save user search term + if album['SearchTerm']: + usersearchterm = term + else: + usersearchterm = '' + + semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace') + semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace') + # Replace bad characters in the term and unicode it + term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') + artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace') + albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace') + + logger.info("Searching torrents for %s since it was marked as wanted" % term) + + resultlist = [] + pre_sorted_results = False + minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1 + + if headphones.KAT: + provider = "Kick Ass Torrent" + providerurl = url_fix("http://kickass.to/usearch/" + term) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + params = { + "categories[0]": "music", + "field": "seeders", + "sorder": "desc", + "rss": "1" + } + searchURL = providerurl + "/?%s" % urllib.urlencode(params) try: - year = reldate[:4] - except TypeError: - year = '' + data = urllib2.urlopen(searchURL, timeout=20) + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False - dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':' ', '*':''} + if data: - semi_cleanalbum = helpers.replace_all(albums[1], dic) - cleanalbum = helpers.latinToAscii(semi_cleanalbum) - semi_cleanartist = helpers.replace_all(albums[0], dic) - cleanartist = helpers.latinToAscii(semi_cleanartist) + logger.info(u'Parsing results from KAT' % searchURL) - # Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot - # of these torrent providers are just using cleanartist/cleanalbum terms - if albums[4]: - term = albums[4] + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass - else: - # FLAC usually doesn't have a year for some reason so I'll leave it out - # Various Artist albums might be listed as VA, so I'll leave that out too - # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4: - term = cleanartist + ' ' + cleanalbum + ' ' + year - elif albums[0] == 'Various Artists': - term = cleanalbum + ' ' + year else: - term = cleanartist + ' ' + cleanalbum - - # Save user search term - if albums[4]: - usersearchterm = term - else: - usersearchterm = '' - - semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace') - semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace') - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace') - albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace') - - logger.info("Searching torrents for %s since it was marked as wanted" % term) - - resultlist = [] - pre_sorted_results = False - minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1 - - if headphones.KAT: - provider = "Kick Ass Torrent" - providerurl = url_fix("http://kickass.to/usearch/" + term) - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - params = { - "categories[0]": "music", - "field": "seeders", - "sorder": "desc", - "rss": "1" - } - searchURL = providerurl + "/?%s" % urllib.urlencode(params) - - try: - data = urllib2.urlopen(searchURL, timeout=20) - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from KAT' % searchURL) - - d = feedparser.parse(data) - - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: + for item in d.entries: + try: + rightformat = True + title = item['title'] + seeders = item['torrent_seeds'] + url = item['links'][1]['href'] + size = int(item['links'][1]['length']) try: - rightformat = True - title = item['title'] - seeders = item['torrent_seeds'] - url = item['links'][1]['href'] - size = int(item['links'][1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - request.add_header('Referer', 'http://kat.ph/') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < int(seeders): - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat)) - + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + request.add_header('Referer', 'http://kat.ph/') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False except Exception, e: - logger.error(u"An unknown error occurred in the KAT parser: %s" % e) + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < int(seeders): + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeders), rightformat)) - if headphones.WAFFLES: - provider = "Waffles.fm" - providerurl = url_fix("https://www.waffles.fm/browse.php") + except Exception, e: + logger.error(u"An unknown error occurred in the KAT parser: %s" % e) + + if headphones.WAFFLES: + provider = "Waffles.fm" + providerurl = url_fix("https://www.waffles.fm/browse.php") + + bitrate = None + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + format = "FLAC" + bitrate = "(Lossless)" + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + format = "FLAC OR MP3" + maxsize = 10000000000 + else: + format = "MP3" + maxsize = 300000000 + + if not usersearchterm: + query_items = ['artist:"%s"' % artistterm, + 'album:"%s"' % albumterm, + 'year:(%s)' % year] + else: + query_items = [usersearchterm] + + query_items.extend(['format:(%s)' % format, + 'size:[0 TO %d]' % maxsize, + '-seeders:0']) # cut out dead torrents + + if bitrate: + query_items.append('bitrate:"%s"' % bitrate) + + params = { + "uid": headphones.WAFFLES_UID, + "passkey": headphones.WAFFLES_PASSKEY, + "rss": "1", + "c0": "1", + "s": "seeders", # sort by + "d": "desc" # direction + } + + searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items))) + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False + + if data: + + logger.info(u'Parsing results from Waffles.fm' % searchURL) + + d = feedparser.parse(data) + + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in d.entries: + + try: + title = item.title + desc_match = re.search(r"Size: (\d+)<", item.description) + size = int(desc_match.group(1)) + url = item.link + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + except Exception, e: + logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s" % e) + + # rutracker.org + + if headphones.RUTRACKER and rulogin: + + provider = "rutracker.org" + + # Ignore if release date not specified, results too unpredictable + + if not year and not usersearchterm: + logger.info(u'Release date not specified, ignoring for rutracker.org') + else: + + bitrate = False - bitrate = None if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - format = "FLAC" - bitrate = "(Lossless)" + format = 'lossless' maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - format = "FLAC OR MP3" + elif headphones.PREFERRED_QUALITY == 1: + format = 'lossless+mp3' maxsize = 10000000000 else: - format = "MP3" + format = 'mp3' maxsize = 300000000 + if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: + bitrate = True + + # build search url based on above if not usersearchterm: - query_items = ['artist:"%s"' % artistterm, - 'album:"%s"' % albumterm, - 'year:(%s)' % year] + searchURL = rutracker.searchurl(artistterm, albumterm, year, format) else: - query_items = [usersearchterm] + searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format) - query_items.extend(['format:(%s)' % format, - 'size:[0 TO %d]' % maxsize, - '-seeders:0']) # cut out dead torrents + logger.info(u'Parsing results from rutracker.org' % searchURL) + # parse results and get best match + + rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate) + + # add best match to overall results list + + if rulist: + for ru in rulist: + title = ru[0].decode('utf-8') + size = ru[1] + url = ru[2] + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info(u"No valid results found from %s" % (provider)) + + if headphones.WHATCD: + provider = "What.cd" + providerurl = "http://what.cd/" + + bitrate = None + bitrate_string = bitrate + + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: # Lossless Only mode + search_formats = [gazelleformat.FLAC] + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY == 2: # Preferred quality mode + search_formats = [None] # should return all + bitrate = headphones.PREFERRED_BITRATE if bitrate: - query_items.append('bitrate:"%s"' % bitrate) - - params = { - "uid": headphones.WAFFLES_UID, - "passkey": headphones.WAFFLES_PASSKEY, - "rss": "1", - "c0": "1", - "s": "seeders", # sort by - "d": "desc" # direction - } - - searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items))) + for encoding_string in gazelleencoding.ALL_ENCODINGS: + if re.search(bitrate, encoding_string, flags=re.I): + bitrate_string = encoding_string + if bitrate_string not in gazelleencoding.ALL_ENCODINGS: + raise Exception("Preferred bitrate %s not recognized by %s" % (bitrate_string, provider)) + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY == 1: # Highest quality including lossless + search_formats = [gazelleformat.FLAC, gazelleformat.MP3] + maxsize = 10000000000 + else: # Highest quality excluding lossless + search_formats = [gazelleformat.MP3] + maxsize = 300000000 + if not gazelle or not gazelle.logged_in(): try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False + logger.info(u"Attempting to log in to What.cd...") + gazelle = gazelleapi.GazelleAPI(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD) + gazelle._login() + except Exception, e: + gazelle = None + logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e))) - if data: + if gazelle and gazelle.logged_in(): + logger.info(u"Searching %s..." % provider) + all_torrents = [] + for search_format in search_formats: + all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term, + groupname=semi_clean_album_term, + format=search_format, encoding=bitrate_string)['results']) - logger.info(u'Parsing results from Waffles.fm' % searchURL) + # filter on format, size, and num seeders + logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") + match_torrents = [ torrent for torrent in all_torrents if torrent.size <= maxsize ] + match_torrents = [ torrent for torrent in match_torrents if torrent.seeders >= minimumseeders ] - d = feedparser.parse(data) + logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass + # sort by times d/l'd + if not len(match_torrents): + logger.info(u"No results found from %s for %s after filtering" % (provider, term)) + elif len(match_torrents) > 1: + logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % + (len(match_torrents), provider, artistterm, albumterm)) + logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) + match_torrents.sort(key=lambda x: int(x.snatched), reverse=True) + if gazelleformat.MP3 in search_formats: + # sort by size after rounding to nearest 10MB...hacky, but will favor highest quality + match_torrents.sort(key=lambda x: int(10 * round(x.size/1024./1024./10.)), reverse=True) + if search_formats and None not in search_formats: + match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless +# if bitrate: +# match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) +# match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) + logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - else: - for item in d.entries: + pre_sorted_results = True + for torrent in match_torrents: + if not torrent.file_path: + torrent.group.update_group_data() # will load the file_path for the individual torrents + resultlist.append((torrent.file_path, + torrent.size, + gazelle.generate_torrent_link(torrent.id), + provider, + 'torrent')) - try: - title = item.title - desc_match = re.search(r"Size: (\d+)<", item.description) - size = int(desc_match.group(1)) - url = item.link - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - except Exception, e: - logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s" % e) - - # rutracker.org - - if headphones.RUTRACKER and rulogin: - - provider = "rutracker.org" - - # Ignore if release date not specified, results too unpredictable - - if not year and not usersearchterm: - logger.info(u'Release date not specified, ignoring for rutracker.org') - else: - - bitrate = False - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - format = 'lossless' - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 1: - format = 'lossless+mp3' - maxsize = 10000000000 - else: - format = 'mp3' - maxsize = 300000000 - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - bitrate = True - - # build search url based on above - - if not usersearchterm: - searchURL = rutracker.searchurl(artistterm, albumterm, year, format) - else: - searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format) - - logger.info(u'Parsing results from rutracker.org' % searchURL) - - # parse results and get best match - - rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate) - - # add best match to overall results list - - if rulist: - for ru in rulist: - title = ru[0].decode('utf-8') - size = ru[1] - url = ru[2] - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info(u"No valid results found from %s" % (provider)) - - if headphones.WHATCD: - provider = "What.cd" - providerurl = "http://what.cd/" - - bitrate = None - bitrate_string = bitrate - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: # Lossless Only mode - search_formats = [gazelleformat.FLAC] - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 2: # Preferred quality mode - search_formats = [None] # should return all - bitrate = headphones.PREFERRED_BITRATE - if bitrate: - for encoding_string in gazelleencoding.ALL_ENCODINGS: - if re.search(bitrate, encoding_string, flags=re.I): - bitrate_string = encoding_string - if bitrate_string not in gazelleencoding.ALL_ENCODINGS: - raise Exception("Preferred bitrate %s not recognized by %s" % (bitrate_string, provider)) - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY == 1: # Highest quality including lossless - search_formats = [gazelleformat.FLAC, gazelleformat.MP3] - maxsize = 10000000000 - else: # Highest quality excluding lossless - search_formats = [gazelleformat.MP3] - maxsize = 300000000 - - if not gazelle or not gazelle.logged_in(): - try: - logger.info(u"Attempting to log in to What.cd...") - gazelle = gazelleapi.GazelleAPI(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD) - gazelle._login() - except Exception, e: - gazelle = None - logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e))) - - if gazelle and gazelle.logged_in(): - logger.info(u"Searching %s..." % provider) - all_torrents = [] - for search_format in search_formats: - all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term, - groupname=semi_clean_album_term, - format=search_format, encoding=bitrate_string)['results']) - - # filter on format, size, and num seeders - logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") - match_torrents = [ torrent for torrent in all_torrents if torrent.size <= maxsize ] - match_torrents = [ torrent for torrent in match_torrents if torrent.seeders >= minimumseeders ] - - logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - - # sort by times d/l'd - if not len(match_torrents): - logger.info(u"No results found from %s for %s after filtering" % (provider, term)) - elif len(match_torrents) > 1: - logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % - (len(match_torrents), provider, artistterm, albumterm)) - logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) - match_torrents.sort(key=lambda x: int(x.snatched), reverse=True) - if gazelleformat.MP3 in search_formats: - # sort by size after rounding to nearest 10MB...hacky, but will favor highest quality - match_torrents.sort(key=lambda x: int(10 * round(x.size/1024./1024./10.)), reverse=True) - if search_formats and None not in search_formats: - match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless - # if bitrate: - # match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) - # match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) - logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) - - pre_sorted_results = True - for torrent in match_torrents: - if not torrent.file_path: - torrent.group.update_group_data() # will load the file_path for the individual torrents - resultlist.append((torrent.file_path, - torrent.size, - gazelle.generate_torrent_link(torrent.id), - provider)) - - # Pirate Bay - if headphones.PIRATEBAY: - provider = "The Pirate Bay" - if headphones.PIRATEBAY_PROXY_URL: - #Might need to clean up the user submitted url - pirate_proxy = headphones.PIRATEBAY_PROXY_URL - - if not pirate_proxy.startswith('http'): - pirate_proxy = 'http://' + pirate_proxy - if pirate_proxy.endswith('/'): - pirate_proxy = pirate_proxy[:-1] - - providerurl = url_fix(pirate_proxy + "/search/" + term + "/0/99/") - - else: - providerurl = url_fix("http://thepiratebay.se/search/" + term + "/0/99/") - - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - category = '104' #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - category = '100' #audio cat - maxsize = 10000000000 - else: - category = '101' #mp3 - maxsize = 300000000 - - searchURL = providerurl + category + # Pirate Bay + if headphones.PIRATEBAY: + provider = "The Pirate Bay" + if headphones.PIRATEBAY_PROXY_URL: + #Might need to clean up the user submitted url + pirate_proxy = headphones.PIRATEBAY_PROXY_URL - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from The Pirate Bay: %s' % e) - data = False + if not pirate_proxy.startswith('http'): + pirate_proxy = 'http://' + pirate_proxy + if pirate_proxy.endswith('/'): + pirate_proxy = pirate_proxy[:-1] + + providerurl = url_fix(pirate_proxy + "/search/" + term + "/0/99/") - if data: + else: + providerurl = url_fix("http://thepiratebay.se/search/" + term + "/0/99/") - logger.info(u'Parsing results from The Pirate Bay' % searchURL) - - soup = BeautifulSoup(data) - table = soup.find('table') - rows = None - if table: - rows = table.findAll('tr') - - if not rows or len(rows) == '1': - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in rows[1:]: - try: - rightformat = True - title = ''.join(item.find("a", {"class" : "detLink"})) - seeds = int(''.join(item.find("td", {"align" : "right"}))) - url = item.findAll("a")[3]['href'] - if headphones.TORRENT_DOWNLOADER == 0: - tor_hash = re.findall("urn:btih:(.*?)&", url) - if len(tor_hash) > 0: - url = "http://torrage.com/torrent/"+str(tor_hash[0]).upper()+".torrent" - else: - url = None - formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') - size = helpers.piratesize(formatted_size) - if size < maxsize and minimumseeders < seeds and url != None: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, formatted_size)) - else: - logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) - - except Exception, e: - logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + category = '104' #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + category = '100' #audio cat + maxsize = 10000000000 + else: + category = '101' #mp3 + maxsize = 300000000 - if headphones.ISOHUNT: - provider = "isoHunt" - providerurl = url_fix("http://isohunt.com/js/rss/" + term) - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - params = { - "iht": "2", - "sort": "seeds" - } - searchURL = providerurl + "?%s" % urllib.urlencode(params) - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from isoHunt' % searchURL) - - d = feedparser.parse(data) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: - try: - rightformat = True - title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title) - title = title.replace("[]","") - sxstart = item.description.find("Seeds: ") + 7 - seeds = "" - while item.description[sxstart:sxstart + 1] != " ": - seeds = seeds + item.description[sxstart:sxstart + 1] - sxstart = sxstart + 1 - url = item.links[1]['url'] - size = int(item.links[1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - for findterm in term.split(" "): - if not findterm in title: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < seeds: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) - - except Exception, e: - logger.error(u"An unknown error occurred in the isoHunt parser: %s" % e) - - if headphones.MININOVA: - provider = "Mininova" - providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5") - if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac - maxsize = 10000000000 - elif headphones.PREFERRED_QUALITY: - categories = "7" #music - format = "10" #mp3+flac - maxsize = 10000000000 - else: - categories = "7" #music - format = "8" #mp3 - maxsize = 300000000 - - searchURL = providerurl - - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False - - if data: - - logger.info(u'Parsing results from Mininova' % searchURL) - - d = feedparser.parse(data) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass - - else: - for item in d.entries: - try: - rightformat = True - title = item.title - sxstart = item.description.find("Ratio: ") + 7 - seeds = "" - while item.description[sxstart:sxstart + 1] != " ": - seeds = seeds + item.description[sxstart:sxstart + 1] - sxstart = sxstart + 1 - url = item.links[1]['url'] - size = int(item.links[1]['length']) - try: - if format == "2": - request = urllib2.Request(url) - request.add_header('Accept-encoding', 'gzip') - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO( response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() - else: - torrent = response.read() - if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: - rightformat = False - except Exception, e: - rightformat = False - if rightformat == True and size < maxsize and minimumseeders < seeds: - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) - - except Exception, e: - logger.error(u"An unknown error occurred in the Mininova Parser: %s" % e) - - - - #attempt to verify that this isn't a substring result - #when looking for "Foo - Foo" we don't want "Foobar" - #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - if len(resultlist): - resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] - - if len(resultlist): - - # Add a priority if it has any of the preferred words - temp_list = [] - for result in resultlist: - if headphones.PREFERRED_WORDS and any(word.lower() in result[0].lower() for word in helpers.split_string(headphones.PREFERRED_WORDS)): - temp_list.append((result[0],result[1],result[2],result[3],1)) - else: - temp_list.append((result[0],result[1],result[2],result[3],0)) - - resultlist = temp_list - - if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE: - - logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE) - - tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid]) - - try: - albumlength = sum([pair[0] for pair in tracks]) - - targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128 - - if not targetsize: - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) - newlist = [] - flac_list = [] - - if headphones.PREFERRED_BITRATE_HIGH_BUFFER: - high_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_HIGH_BUFFER)/100 - else: - high_size_limit = None - if headphones.PREFERRED_BITRATE_LOW_BUFFER: - low_size_limit = targetsize * int(headphones.PREFERRED_BITRATE_LOW_BUFFER)/100 - else: - low_size_limit = None - - for result in resultlist: - - if high_size_limit and (int(result[1]) > high_size_limit): - logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")") - - # Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches - if 'flac' in result[0].lower(): - flac_list.append((result[0], result[1], result[2], result[3], result[4])) - - continue - - if low_size_limit and (int(result[1]) < low_size_limit): - logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")") - continue - - delta = abs(targetsize - int(result[1])) - newlist.append((result[0], result[1], result[2], result[3], result[4], delta)) - - torrentlist = sorted(newlist, key=lambda title: (-title[4], title[5])) - - if not len(torrentlist) and len(flac_list) and headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS: - logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") - torrentlist = sorted(flac_list, key=lambda title: (title[4], int(title[1])), reverse=True) - - except Exception, e: - - logger.debug('Error: %s' % str(e)) - logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1])) - - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - else: - - torrentlist = sorted(resultlist, key=lambda title: (title[4], int(title[1])), reverse=True) - - if new: - - while True: - - if len(torrentlist): - - alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [torrentlist[0][2]]) - - if len(alreadydownloaded): - logger.info('%s has already been downloaded. Skipping.' % torrentlist[0][0]) - torrentlist.pop(0) - - else: - break - else: - logger.info('No more results found for %s' % term) - return - - logger.info(u"Pre-processing result") - - (data, bestqual) = preprocesstorrent(torrentlist, pre_sorted_results) - - if data and bestqual: - logger.info(u'Found best result from %s: %s - %s' % (bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))) - - torrent_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year) - - # Blackhole - if headphones.TORRENT_DOWNLOADER == 0: - - if bestqual[2].startswith("magnet:"): - logger.error("Cannot save magnet files to blackhole. Please switch your torrent downloader to Transmission or uTorrent") - return - - # Get torrent name from .torrent, this is usually used by the torrent client as the folder name - - torrent_name = torrent_folder_name + '.torrent' - download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) - try: - if bestqual[3] == 'rutracker.org': - download_path = rutracker.get_torrent(bestqual[2], headphones.TORRENTBLACKHOLE_DIR) - if not download_path: - break - else: - #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name. - prev = os.umask(headphones.UMASK) - torrent_file = open(download_path, 'wb') - torrent_file.write(data) - torrent_file.close() - os.umask(prev) - - #Open the fresh torrent file again so we can extract the proper torrent name - #Used later in post-processing. - torrent_file = open(download_path, 'rb') - torrent_info = bencode.bdecode(torrent_file.read()) - torrent_file.close() - torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8') - logger.info('Torrent folder name: %s' % torrent_folder_name) - except Exception, e: - logger.error('Couldn\'t get name from Torrent file: %s' % e) - break - - elif headphones.TORRENT_DOWNLOADER == 1: - logger.info("Sending torrent to Transmission") - - # rutracker needs cookies to be set, pass the .torrent file instead of url - if bestqual[3] == 'rutracker.org': - file_or_url = rutracker.get_torrent(bestqual[2]) - else: - file_or_url = bestqual[2] - - torrentid = transmission.addTorrent(file_or_url) - - if not torrentid: - logger.error("Error sending torrent to Transmission. Are you sure it's running?") - return - - torrent_folder_name = transmission.getTorrentFolder(torrentid) - if torrent_folder_name: - logger.info('Torrent folder name: %s' % torrent_folder_name) - else: - logger.error('Torrent folder name could not be determined') - return - - # remove temp .torrent file created above - if bestqual[3] == 'rutracker.org': - try: - shutil.rmtree(os.path.split(file_or_url)[0]) - except Exception, e: - logger.warning('Couldn\'t remove temp dir %s' % e) - - myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name, "torrent"]) - -def preprocesstorrent(resultlist, pre_sorted_list=False): - - # Get out of here if we're using Transmission or uTorrent - if headphones.TORRENT_DOWNLOADER != 0: - return True, resultlist[0] + searchURL = providerurl + category - for result in resultlist: + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from The Pirate Bay: %s' % e) + data = False + + if data: + + logger.info(u'Parsing results from The Pirate Bay' % searchURL) + + soup = BeautifulSoup(data) + table = soup.find('table') + rows = None + if table: + rows = table.findAll('tr') + + if not rows or len(rows) == '1': + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in rows[1:]: + try: + rightformat = True + title = ''.join(item.find("a", {"class" : "detLink"})) + seeds = int(''.join(item.find("td", {"align" : "right"}))) + url = item.findAll("a")[3]['href'] + if headphones.TORRENT_DOWNLOADER == 0: + tor_hash = re.findall("urn:btih:(.*?)&", url) + if len(tor_hash) > 0: + url = "http://torrage.com/torrent/"+str(tor_hash[0]).upper()+".torrent" + else: + url = None + formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') + size = helpers.piratesize(formatted_size) + if size < maxsize and minimumseeders < seeds and url != None: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, formatted_size)) + else: + logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) + + except Exception, e: + logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e) - # get outta here if rutracker or piratebay - if result[3] == 'rutracker.org': - return True, result + if headphones.ISOHUNT: + provider = "isoHunt" + providerurl = url_fix("http://isohunt.com/js/rss/" + term) + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + params = { + "iht": "2", + "sort": "seeds" + } + searchURL = providerurl + "?%s" % urllib.urlencode(params) try: - request = urllib2.Request(result[2]) - request.add_header('Accept-encoding', 'gzip') - - if result[3] == 'Kick Ass Torrent': - request.add_header('Referer', 'http://kat.ph/') + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False - if result[3] == 'What.cd': - request.add_header('User-Agent', 'Headphones') + if data: + + logger.info(u'Parsing results from isoHunt' % searchURL) + + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass - response = urllib2.urlopen(request) - if response.info().get('Content-Encoding') == 'gzip': - buf = StringIO(response.read()) - f = gzip.GzipFile(fileobj=buf) - torrent = f.read() else: - torrent = response.read() - except ExpatError: - logger.error('Unable to torrent %s' % result[2]) - continue + for item in d.entries: + try: + rightformat = True + title = re.sub(r"(?<= \[)(.+)(?=\])","",item.title) + title = title.replace("[]","") + sxstart = item.description.find("Seeds: ") + 7 + seeds = "" + while item.description[sxstart:sxstart + 1] != " ": + seeds = seeds + item.description[sxstart:sxstart + 1] + sxstart = sxstart + 1 + url = item.links[1]['url'] + size = int(item.links[1]['length']) + try: + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False + except Exception, e: + rightformat = False + for findterm in term.split(" "): + if not findterm in title: + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < seeds: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) + + except Exception, e: + logger.error(u"An unknown error occurred in the isoHunt parser: %s" % e) + + if headphones.MININOVA: + provider = "Mininova" + providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5") + if headphones.PREFERRED_QUALITY == 3 or losslessOnly: + categories = "7" #music + format = "2" #flac + maxsize = 10000000000 + elif headphones.PREFERRED_QUALITY: + categories = "7" #music + format = "10" #mp3+flac + maxsize = 10000000000 + else: + categories = "7" #music + format = "8" #mp3 + maxsize = 300000000 + + searchURL = providerurl + + try: + data = urllib2.urlopen(searchURL, timeout=20).read() + except urllib2.URLError, e: + logger.warn('Error fetching data from %s: %s' % (provider, e)) + data = False + + if data: + + logger.info(u'Parsing results from Mininova' % searchURL) + + d = feedparser.parse(data) + if not len(d.entries): + logger.info(u"No results found from %s for %s" % (provider, term)) + pass + + else: + for item in d.entries: + try: + rightformat = True + title = item.title + sxstart = item.description.find("Ratio: ") + 7 + seeds = "" + while item.description[sxstart:sxstart + 1] != " ": + seeds = seeds + item.description[sxstart:sxstart + 1] + sxstart = sxstart + 1 + url = item.links[1]['url'] + size = int(item.links[1]['length']) + try: + if format == "2": + request = urllib2.Request(url) + request.add_header('Accept-encoding', 'gzip') + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO( response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1: + rightformat = False + except Exception, e: + rightformat = False + if rightformat == True and size < maxsize and minimumseeders < seeds: + resultlist.append((title, size, url, provider, 'torrent')) + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) + + except Exception, e: + logger.error(u"An unknown error occurred in the Mininova Parser: %s" % e) + + #attempt to verify that this isn't a substring result + #when looking for "Foo - Foo" we don't want "Foobar" + #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + if len(resultlist): + resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + if len(resultlist): + return resultlist + + +# THIS IS KIND OF A MESS AND PROBABLY NEEDS TO BE CLEANED UP +def preprocess(resultlist): + + for result in resultlist: + + if result[4] == 'torrent': + #Get out of here if we're using Transmission or uTorrent + if headphones.TORRENT_DOWNLOADER != 0: + return True, result + # get outta here if rutracker or piratebay + if result[3] == 'rutracker.org': + return True, result + + try: + request = urllib2.Request(result[2]) + request.add_header('Accept-encoding', 'gzip') + if result[3] == 'Kick Ass Torrent': + request.add_header('Referer', 'http://kat.ph/') + + if result[3] == 'What.cd': + request.add_header('User-Agent', 'Headphones') + + response = urllib2.urlopen(request) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO(response.read()) + f = gzip.GzipFile(fileobj=buf) + torrent = f.read() + else: + torrent = response.read() + except ExpatError: + logger.error('Unable to torrent %s' % result[2]) + continue + + return torrent, result + + else: + if not headphones.USENET_RETENTION: + usenet_retention = 2000 + else: + usenet_retention = int(headphones.USENET_RETENTION) + + nzb = getresultNZB(result) + if nzb: + try: + d = minidom.parseString(nzb) + node = d.documentElement + nzbfiles = d.getElementsByTagName("file") + skipping = False + for nzbfile in nzbfiles: + if int(nzbfile.getAttribute("date")) < (time.time() - usenet_retention * 86400): + logger.info('NZB contains a file out of your retention. Skipping.') + skipping = True + break + if skipping: + continue + + #TODO: Do we want rar checking in here to try to keep unknowns out? + #or at least the option to do so? + except Exception, e: + logger.error('Unable to parse the best result NZB. Error: ' + str(e) + '. (Make sure your username/password/API is correct for provider: ' + result[3]) + continue + + return nzb, result + else: + logger.error("Couldn't retrieve the best nzb. Skipping.") + continue + + return (None, None) + - return torrent, result From 7761015b1536f6c670c08108c32fcc52a4d5bd37 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Thu, 3 Apr 2014 19:45:13 -0700 Subject: [PATCH 6/6] Couple of bug fixes --- headphones/searcher.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index cd03cf48..d99a8d81 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -163,15 +163,28 @@ def do_sorted_search(album, new, losslessOnly): else: + nzb_results = None + torrent_results = None + if NZB_PROVIDERS and NZB_DOWNLOADERS: nzb_results = searchNZB(album, new, losslessOnly) if TORRENT_PROVIDERS: torrent_results = searchTorrent(album, new, losslessOnly) + if not nzb_results: + nzb_results = [] + + if not torrent_results: + torrent_results = [] + results = nzb_results + torrent_results sorted_search_results = sort_search_results(results, album, new) + + if not sorted_search_results: + return + logger.info(u"Making sure we can download the best result") (data, bestqual) = preprocess(sorted_search_results) @@ -272,11 +285,11 @@ def sort_search_results(resultlist, album, new): else: break else: - logger.info('No more results found for %s' % term) + logger.info('No more results found for: %s - %s' % (album['ArtistName'], album['AlbumTitle'])) return None if not len(finallist): - logger.info('No appropriate matches found for %s' % term) + logger.info('No appropriate matches found for %s - %s' % (album['ArtistName'], album['AlbumTitle'])) return None return finallist