diff --git a/headphones/request.py b/headphones/request.py index 6cc0f7f2..8f2c4359 100644 --- a/headphones/request.py +++ b/headphones/request.py @@ -170,13 +170,22 @@ def server_message(response): message = None # First attempt is to 'read' the response as HTML - if response.headers.get("content-type") == "text/html": + if "text/html" in response.headers.get("content-type"): try: soup = BeautifulSoup(response.content, "html5lib") except Exception: pass + # Find body and cleanup common tags to grab content, which probably + # contains the message. message = soup.find("body") + + for element in ["header", "script", "footer", "nav", "input", + "textarea"]: + + for tag in soup.find_all(element): + tag.replaceWith("") + message = message.text if message else soup.text message = message.strip() diff --git a/headphones/searcher.py b/headphones/searcher.py index a40342a6..72c71608 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -307,9 +307,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None): # Use the provided search term if available, otherwise build a search term if album['SearchTerm']: term = album['SearchTerm'] - else: - # FLAC usually doesn't have a year for some reason so I'll leave it out + # FLAC usually doesn't have a year for some reason so leave it out. # Various Artist albums might be listed as VA, so I'll leave that out too # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: @@ -344,8 +343,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None): categories = "3010" if album['Type'] == 'Other': - categories = "3030" logger.info("Album type is audiobook/spokenword. Using audiobook category") + categories = "3030" # Request results logger.info('Parsing results from Headphones Indexer') @@ -483,8 +482,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None): data = request.request_feed( url='http://beta.nzbs.org/api', - params=params, headers=headers, - timeout=20 + params=params, headers=headers ) # Process feed @@ -660,8 +658,8 @@ def send_to_downloader(data, bestqual, album): except: logger.error("Could not change permissions for file: %s", download_path) - #Open the fresh torrent file again so we can extract the proper torrent name - #Used later in post-processing. + # Open the fresh torrent file again so we can extract the + # proper torrent name Used later in post-processing. with open(download_path, 'rb') as fp: torrent_info = bdecode(fp.read()) @@ -933,28 +931,33 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None): if headphones.KAT: provider = "Kick Ass Torrents" + ka_term = term.replace("!", "") + # Use proxy if specified if headphones.KAT_PROXY_URL: - proxy_url = set_proxy(headphones.KAT_PROXY_URL) - providerurl = url_fix(proxy_url + "/usearch/" + term) + providerurl = url_fix(set_proxy(proxy_url)) else: - providerurl = url_fix("http://kickass.to/usearch/" + term) + providerurl = url_fix("https://kickass.to") + # Build URL + providerurl = providerurl + "/usearch/" + ka_term + + # Pick category for torrents if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - categories = "7" #music - format = "2" #flac + categories = "7" # Music + format = "2" # FLAC maxsize = 10000000000 elif headphones.PREFERRED_QUALITY == 1 or allow_lossless: - categories = "7" #music - format = "10" #mp3+flac + categories = "7" # Music + format = "10" # MP3 and FLAC maxsize = 10000000000 else: - categories = "7" #music - format = "8" #mp3 + categories = "7" # Music + format = "8" # MP3 only maxsize = 300000000 # Requesting content - logger.info('Parsing results from KAT') + logger.info("Searching KAT using term: %s", ka_term) params = { "categories[0]": "music", @@ -962,17 +965,12 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None): "sorder": "desc", "rss": "1" } - - data = request.request_feed( - url=providerurl, - params=params, - timeout=20 - ) + data = request.request_feed(url=providerurl, params=params) # Process feed if data: if not len(data.entries): - logger.info(u"No results found from %s for %s", provider, term) + logger.info("No results found") else: for item in data.entries: try: @@ -981,16 +979,17 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None): seeders = item['torrent_seeds'] url = item['links'][1]['href'] size = int(item['links'][1]['length']) + if format == "2": torrent = request.request_content(url) if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1): rightformat = False + if rightformat == True and size < maxsize and minimumseeders < int(seeders): resultlist.append((title, size, url, provider, 'torrent')) logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) else: logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) - except Exception as e: logger.exception("Unhandled exception in the KAT parser") @@ -1183,51 +1182,48 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None): # Pirate Bay if headphones.PIRATEBAY: provider = "The Pirate Bay" - if headphones.PIRATEBAY_PROXY_URL: - proxy_url = set_proxy(headphones.PIRATEBAY_PROXY_URL) - providerurl = url_fix(proxy_url + "/search/" + term + "/0/99/") - else: - providerurl = url_fix("http://thepiratebay.se/search/" + term + "/0/99/") + tpb_term = term.replace("!", "") + # Use proxy if specified + if headphones.PIRATEBAY_PROXY_URL: + providerurl = url_fix(set_proxy(headphones.PIRATEBAY_PROXY_URL)) + else: + providerurl = url_fix("https://thepiratebay.se") + + # Build URL + providerurl = providerurl + "/search/" + tpb_term + "/0/7/" # 7 is sort by seeders + + # Pick category for torrents if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - category = '104' #flac + category = '104' # FLAC maxsize = 10000000000 elif headphones.PREFERRED_QUALITY == 1 or allow_lossless: - category = '100' #audio cat + category = '100' # General audio category maxsize = 10000000000 else: - category = '101' #mp3 + category = '101' # MP3 only maxsize = 300000000 - # Requesting content - logger.info('Parsing results from The Pirate Bay') + # Request content + logger.info("Searching The Pirate Bay using term: %s", tpb_term) headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'} - params = { - "iht": "2", - "sort": "seeds" - } - - data = request.request_soup( - url=providerurl + category, - params=params, - headers=headers, - timeout=20 - ) + data = request.request_soup(url=providerurl + category, headers=headers) # Process content if data: - rows = data.select('table tr') + rows = data.select('table tbody tr') - if not rows or len(rows) == '1': - logger.info(u"No results found from %s for %s" % (provider, term)) + if not rows: + logger.info("No results found") else: - for item in rows[1:]: + for item in rows: try: + url = None rightformat = True title = ''.join(item.find("a", {"class" : "detLink"})) seeds = int(''.join(item.find("td", {"align" : "right"}))) - url = None + if headphones.TORRENT_DOWNLOADER == 0: try: url = item.find("a", {"title":"Download this torrent"})['href'] @@ -1238,17 +1234,19 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None): logger.info('"%s" only has a magnet link, skipping' % title) continue else: - url = item.findAll("a")[3]['href'] + url = item.findAll("a")[3]["href"] + if url.lower().startswith("//"): url = "http:" + url + formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') size = helpers.piratesize(formatted_size) - if size < maxsize and minimumseeders < seeds and url != None: - resultlist.append((title, size, url, provider, 'torrent')) + + if size < maxsize and minimumseeders < seeds and url is not None: + resultlist.append((title, size, url, provider, "torrent")) logger.info('Found %s. Size: %s' % (title, formatted_size)) else: logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) - except Exception as e: logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e)