Files
headphones/headphones/searcher.py
T

1575 lines
66 KiB
Python

# This file is part of Headphones.
#
# Headphones is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Headphones is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>.
# NZBGet support added by CurlyMo <curlymoo1@gmail.com> as a part of XBian - XBMC on the Raspberry Pi
import urllib
import urlparse
from pygazelle import api as gazelleapi
from pygazelle import encoding as gazelleencoding
from pygazelle import format as gazelleformat
from base64 import b16encode, b32decode
from hashlib import sha1
import os
import re
import string
import shutil
import random
import urllib
import headphones
import subprocess
import unicodedata
from headphones.common import USER_AGENT
from headphones import logger, db, helpers, classes, sab, nzbget, request
from headphones import utorrent, transmission, notifiers
from bencode import bencode, bdecode
import headphones.searcher_rutracker as rutrackersearch
# Magnet to torrent services, for Black hole. Stolen from CouchPotato.
TORRENT_TO_MAGNET_SERVICES = [
'https://zoink.it/torrent/%s.torrent',
'http://torrage.com/torrent/%s.torrent',
'https://torcache.net/torrent/%s.torrent',
]
# Persistent What.cd API object
gazelle = None
# RUtracker search object
rutracker = rutrackersearch.Rutracker()
def fix_url(s, charset="utf-8"):
"""
Fix the URL so it is proper formatted and encoded.
"""
if isinstance(s, unicode):
s = s.encode(charset, 'ignore')
scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
path = urllib.quote(path, '/%')
qs = urllib.quote_plus(qs, ':&=')
return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))
def torrent_to_file(target_file, data):
"""
Write torrent data to file, and change permissions accordingly. Will return
None in case of a write error. If changing permissions fails, it will
continue anyway.
"""
# Write data to file
try:
with open(target_file, "wb") as fp:
fp.write(data)
except IOError as e:
logger.error("Could not write torrent file '%s': %s. Skipping.",
target_file, e.message)
return
# Try to change permissions
try:
os.chmod(target_file, int(headphones.CONFIG.FILE_PERMISSIONS, 8))
except OSError as e:
logger.warn("Could not change permissions for file '%s': %s. " \
"Continuing.", target_file, e.message)
# Done
return True
def read_torrent_name(torrent_file, default_name=None):
"""
Read the torrent file and return the torrent name. If the torrent name
cannot be determined, it will return the `default_name`.
"""
# Open file
try:
with open(torrent_file, "rb") as fp:
torrent_info = bdecode(fp.read())
except IOError as e:
logger.error("Unable to open torrent file: %s", torrent_file)
return
# Read dictionary
if torrent_info:
try:
return torrent_info["info"]["name"]
except KeyError:
if default_name:
logger.warning("Couldn't get name from torrent file: %s. " \
"Defaulting to '%s'", e, default_name)
else:
logger.warning("Couldn't get name from torrent file: %s. No " \
"default given", e)
# Return default
return default_name
def calculate_torrent_hash(link, data=None):
"""
Calculate the torrent hash from a magnet link or data. Raises a ValueError
when it cannot create a torrent hash given the input data.
"""
if link.startswith("magnet:"):
torrent_hash = re.findall("urn:btih:([\w]{32,40})", link)[0]
if len(torrent_hash) == 32:
torrent_hash = b16encode(b32decode(torrent_hash)).lower()
elif data:
info = bdecode(data)["info"]
torrent_hash = sha1(bencode(info)).hexdigest()
else:
raise ValueError("Cannot calculate torrent hash without magnet link " \
"or data")
return torrent_hash.upper()
def get_seed_ratio(provider):
"""
Return the seed ratio for the specified provider, if applicable. Defaults to
None in case of an error.
"""
if provider == 'rutracker.org':
seed_ratio = headphones.CONFIG.RUTRACKER_RATIO
elif provider == 'Kick Ass Torrents':
seed_ratio = headphones.CONFIG.KAT_RATIO
elif provider == 'What.cd':
seed_ratio = headphones.CONFIG.WHATCD_RATIO
elif provider == 'The Pirate Bay':
seed_ratio = headphones.CONFIG.PIRATEBAY_RATIO
elif provider == 'Old Pirate Bay':
seed_ratio = headphones.CONFIG.OLDPIRATEBAY_RATIO
elif provider == 'Waffles.fm':
seed_ratio = headphones.CONFIG.WAFFLES_RATIO
elif provider == 'Mininova':
seed_ratio = headphones.CONFIG.MININOVA_RATIO
else:
seed_ratio = None
if seed_ratio is not None:
try:
seed_ratio = float(seed_ratio)
except ValueError:
logger.warn("Could not get seed ratio for %s" % provider)
return seed_ratio
def searchforalbum(albumid=None, new=False, losslessOnly=False,
choose_specific_download=False):
logger.info('Searching for wanted albums')
myDB = db.DBConnection()
if not albumid:
results = myDB.select('SELECT * from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
for album in results:
if not album['AlbumTitle'] or not album['ArtistName']:
logger.warn('Skipping release %s. No title available', album['AlbumID'])
continue
new = True
if album['Status'] == "Wanted Lossless":
losslessOnly = True
logger.info('Searching for "%s - %s" since it is marked as wanted' % (album['ArtistName'], album['AlbumTitle']))
do_sorted_search(album, new, losslessOnly)
elif albumid and choose_specific_download:
album = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone()
logger.info('Searching for "%s - %s"' % (album['ArtistName'], album['AlbumTitle']))
results = do_sorted_search(album, new, losslessOnly, choose_specific_download=True)
return results
else:
album = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone()
logger.info('Searching for "%s - %s" since it was marked as wanted' % (album['ArtistName'], album['AlbumTitle']))
do_sorted_search(album, new, losslessOnly)
logger.info('Search for wanted albums complete')
def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
NZB_PROVIDERS = (headphones.CONFIG.HEADPHONES_INDEXER or headphones.CONFIG.NEWZNAB or headphones.CONFIG.NZBSORG or headphones.CONFIG.OMGWTFNZBS)
NZB_DOWNLOADERS = (headphones.CONFIG.SAB_HOST or headphones.CONFIG.BLACKHOLE_DIR or headphones.CONFIG.NZBGET_HOST)
TORRENT_PROVIDERS = (headphones.CONFIG.KAT or headphones.CONFIG.PIRATEBAY or headphones.CONFIG.OLDPIRATEBAY or headphones.CONFIG.MININOVA or headphones.CONFIG.WAFFLES or headphones.CONFIG.RUTRACKER or headphones.CONFIG.WHATCD)
results = []
myDB = db.DBConnection()
albumlength = myDB.select('SELECT sum(TrackDuration) from tracks WHERE AlbumID=?', [album['AlbumID']])[0][0]
if headphones.CONFIG.PREFER_TORRENTS == 0 and not choose_specific_download:
if NZB_PROVIDERS and NZB_DOWNLOADERS:
results = searchNZB(album, new, losslessOnly, albumlength)
if not results and TORRENT_PROVIDERS:
results = searchTorrent(album, new, losslessOnly, albumlength)
elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download:
if TORRENT_PROVIDERS:
results = searchTorrent(album, new, losslessOnly, albumlength)
if not results and NZB_PROVIDERS and NZB_DOWNLOADERS:
results = searchNZB(album, new, losslessOnly, albumlength)
else:
nzb_results = None
torrent_results = None
if NZB_PROVIDERS and NZB_DOWNLOADERS:
nzb_results = searchNZB(album, new, losslessOnly, albumlength, choose_specific_download)
if TORRENT_PROVIDERS:
torrent_results = searchTorrent(album, new, losslessOnly, albumlength, choose_specific_download)
if not nzb_results:
nzb_results = []
if not torrent_results:
torrent_results = []
results = nzb_results + torrent_results
if choose_specific_download:
return results
# Filter all results that do not comply
results = [ result for result in results if result[5] ]
# Sort the remaining results
sorted_search_results = sort_search_results(results, album, new, albumlength)
if not sorted_search_results:
return
logger.info(u"Making sure we can download the best result")
(data, bestqual) = preprocess(sorted_search_results)
if data and bestqual:
send_to_downloader(data, bestqual, album)
def removeDisallowedFilenameChars(filename):
validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits)
cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').lower()
return ''.join(c for c in cleanedFilename if c in validFilenameChars)
def more_filtering(results, album, albumlength, new):
low_size_limit = None
high_size_limit = None
allow_lossless = False
myDB = db.DBConnection()
# Lossless - ignore results if target size outside bitrate range
if headphones.CONFIG.PREFERRED_QUALITY == 3 and albumlength and (headphones.CONFIG.LOSSLESS_BITRATE_FROM or headphones.CONFIG.LOSSLESS_BITRATE_TO):
if headphones.CONFIG.LOSSLESS_BITRATE_FROM:
low_size_limit = albumlength / 1000 * int(headphones.CONFIG.LOSSLESS_BITRATE_FROM) * 128
if headphones.CONFIG.LOSSLESS_BITRATE_TO:
high_size_limit = albumlength / 1000 * int(headphones.CONFIG.LOSSLESS_BITRATE_TO) * 128
# Preferred Bitrate - ignore results if target size outside % buffer
elif headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE:
logger.debug('Target bitrate: %s kbps' % headphones.CONFIG.PREFERRED_BITRATE)
if albumlength:
targetsize = albumlength / 1000 * int(headphones.CONFIG.PREFERRED_BITRATE) * 128
logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
if headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER:
low_size_limit = targetsize * int(headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER) / 100
if headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER:
high_size_limit = targetsize * int(headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER) / 100
if headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
allow_lossless = True
newlist = []
for result in results:
normalizedAlbumArtist = removeDisallowedFilenameChars(album['ArtistName'])
normalizedAlbumTitle = removeDisallowedFilenameChars(album['AlbumTitle'])
normalizedResultTitle = removeDisallowedFilenameChars(result[0])
artistTitleCount = normalizedResultTitle.count(normalizedAlbumArtist)
if normalizedAlbumArtist in normalizedAlbumTitle and artistTitleCount < 2:
continue
if low_size_limit and (int(result[1]) < low_size_limit):
logger.info("%s from %s is too small for this album - not considering it. (Size: %s, Minsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(low_size_limit))
continue
if high_size_limit and (int(result[1]) > high_size_limit):
logger.info("%s from %s is too large for this album - not considering it. (Size: %s, Maxsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(high_size_limit))
# Keep lossless results if there are no good lossy matches
if not (allow_lossless and 'flac' in result[0].lower()):
continue
if new:
alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [result[2]])
if len(alreadydownloaded):
logger.info('%s has already been downloaded from %s. Skipping.' % (result[0], result[3]))
continue
newlist.append(result)
results = newlist
return results
def sort_search_results(resultlist, album, new, albumlength):
if new and not len(resultlist):
logger.info('No more results found for: %s - %s' % (album['ArtistName'], album['AlbumTitle']))
return None
# Add a priority if it has any of the preferred words
temp_list = []
preferred_words = None
if headphones.CONFIG.PREFERRED_WORDS:
preferred_words = helpers.split_string(headphones.CONFIG.PREFERRED_WORDS)
for result in resultlist:
priority = 0
if preferred_words:
if any(word.lower() in result[0].lower() for word in preferred_words):
priority = 1
# add a search provider priority (weighted based on position)
i = next((i for i, word in enumerate(preferred_words) if word in result[3].lower()), None)
if i is not None:
priority += round((len(preferred_words) - i) / float(len(preferred_words)), 2)
temp_list.append((result[0], result[1], result[2], result[3], result[4], priority))
resultlist = temp_list
if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE:
try:
targetsize = albumlength / 1000 * int(headphones.CONFIG.PREFERRED_BITRATE) * 128
if not targetsize:
logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle']))
finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True)
else:
newlist = []
flac_list = []
for result in resultlist:
# Add lossless results to the "flac list" which we can use if there are no good lossy matches
if 'flac' in result[0].lower():
flac_list.append((result[0], result[1], result[2], result[3], result[4], result[5]))
continue
delta = abs(targetsize - int(result[1]))
newlist.append((result[0], result[1], result[2], result[3], result[4], result[5], delta))
finallist = sorted(newlist, key=lambda title: (-title[5], title[6]))
if not len(finallist) and len(flac_list) and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
logger.info("Since there were no appropriate lossy matches (and at least one lossless match, going to use lossless instead")
finallist = sorted(flac_list, key=lambda title: (title[5], int(title[1])), reverse=True)
except Exception:
logger.exception('Unhandled exception')
logger.info('No track information for %s - %s. Defaulting to highest quality', album['ArtistName'], album['AlbumTitle'])
finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True)
else:
finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True)
if not len(finallist):
logger.info('No appropriate matches found for %s - %s', album['ArtistName'], album['AlbumTitle'])
return None
return finallist
def get_year_from_release_date(release_date):
try:
year = release_date[:4]
except TypeError:
year = ''
return year
def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False):
reldate = album['ReleaseDate']
year = get_year_from_release_date(reldate)
dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '.': '', ':': ''}
cleanalbum = helpers.latinToAscii(helpers.replace_all(album['AlbumTitle'], dic)).strip()
cleanartist = helpers.latinToAscii(helpers.replace_all(album['ArtistName'], dic)).strip()
# Use the provided search term if available, otherwise build a search term
if album['SearchTerm']:
term = album['SearchTerm']
else:
# FLAC usually doesn't have a year for some reason so leave it out.
# Various Artist albums might be listed as VA, so I'll leave that out too
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4:
term = cleanartist + ' ' + cleanalbum + ' ' + year
elif album['ArtistName'] == 'Various Artists':
term = cleanalbum + ' ' + year
else:
term = cleanartist + ' ' + cleanalbum
# Replace bad characters in the term and unicode it
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
# If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless
if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
allow_lossless = True
else:
allow_lossless = False
logger.debug("Using search term: %s" % term)
resultlist = []
if headphones.CONFIG.HEADPHONES_INDEXER:
provider = "headphones"
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "3040"
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
categories = "3040,3010"
else:
categories = "3010"
if album['Type'] == 'Other':
logger.info("Album type is audiobook/spokenword. Using audiobook category")
categories = "3030"
# Request results
logger.info('Parsing results from Headphones Indexer')
headers = {'User-Agent': USER_AGENT}
params = {
"t": "search",
"cat": categories,
"apikey": '964d601959918a578a670984bdee9357',
"maxage": headphones.CONFIG.USENET_RETENTION,
"q": term
}
data = request.request_feed(
url="http://indexer.codeshy.com/api",
params=params, headers=headers,
auth=(headphones.CONFIG.HPUSER, headphones.CONFIG.HPPASS)
)
# Process feed
if data:
if not len(data.entries):
logger.info(u"No results found from %s for %s" % ('Headphones Index', term))
else:
for item in data.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
resultlist.append((title, size, url, provider, 'nzb', True))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
except Exception as e:
logger.error(u"An unknown error occurred trying to parse the feed: %s" % e)
if headphones.CONFIG.NEWZNAB:
provider = "newznab"
newznab_hosts = []
if headphones.CONFIG.NEWZNAB_HOST and headphones.CONFIG.NEWZNAB_ENABLED:
newznab_hosts.append((headphones.CONFIG.NEWZNAB_HOST, headphones.CONFIG.NEWZNAB_APIKEY, headphones.CONFIG.NEWZNAB_ENABLED))
for newznab_host in headphones.CONFIG.get_extra_newznabs():
if newznab_host[2] == '1' or newznab_host[2] == 1:
newznab_hosts.append(newznab_host)
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "3040"
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
categories = "3040,3010"
else:
categories = "3010"
if album['Type'] == 'Other':
categories = "3030"
logger.info("Album type is audiobook/spokenword. Using audiobook category")
for newznab_host in newznab_hosts:
provider = newznab_host[0]
# Add a little mod for kere.ws
if newznab_host[0] == "http://kere.ws":
if categories == "3040":
categories = categories + ",4070"
elif categories == "3040,3010":
categories = categories + ",4070,4010"
elif categories == "3010":
categories = categories + ",4010"
else:
categories = categories + ",4050"
# Request results
logger.info('Parsing results from %s', newznab_host[0])
headers = {'User-Agent': USER_AGENT}
params = {
"t": "search",
"apikey": newznab_host[1],
"cat": categories,
"maxage": headphones.CONFIG.USENET_RETENTION,
"q": term
}
data = request.request_feed(
url=newznab_host[0] + '/api?',
params=params, headers=headers
)
# Process feed
if data:
if not len(data.entries):
logger.info(u"No results found from %s for %s", newznab_host[0], term)
else:
for item in data.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
if all(word.lower() in title.lower() for word in term.split()):
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
resultlist.append((title, size, url, provider, 'nzb', True))
else:
logger.info('Skipping %s, not all search term words found' % title)
except Exception as e:
logger.exception("An unknown error occurred trying to parse the feed: %s" % e)
if headphones.CONFIG.NZBSORG:
provider = "nzbsorg"
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "3040"
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
categories = "3040,3010"
else:
categories = "3010"
if album['Type'] == 'Other':
categories = "3030"
logger.info("Album type is audiobook/spokenword. Using audiobook category")
# Request results
logger.info('Requesting from nzbs.org')
headers = {'User-Agent': USER_AGENT}
params = {
"t": "search",
"apikey": headphones.CONFIG.NZBSORG_HASH,
"cat": categories,
"maxage": headphones.CONFIG.USENET_RETENTION,
"q": term
}
data = request.request_feed(
url='http://beta.nzbs.org/api',
params=params, headers=headers,
timeout=5
)
logger.info('Parsing results from nzbs.org')
# Process feed
if data:
if not len(data.entries):
logger.info(u"No results found from nzbs.org for %s" % term)
else:
for item in data.entries:
try:
url = item.link
title = item.title
size = int(item.links[1]['length'])
resultlist.append((title, size, url, provider, 'nzb', True))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
except Exception as e:
logger.exception("Unhandled exception while parsing feed")
if headphones.CONFIG.OMGWTFNZBS:
provider = "omgwtfnzbs"
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
categories = "22"
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
categories = "22,7"
else:
categories = "7"
if album['Type'] == 'Other':
categories = "29"
logger.info("Album type is audiobook/spokenword. Searching all music categories")
# Request results
logger.info('Parsing results from omgwtfnzbs')
headers = {'User-Agent': USER_AGENT}
params = {
"user": headphones.CONFIG.OMGWTFNZBS_UID,
"api": headphones.CONFIG.OMGWTFNZBS_APIKEY,
"catid": categories,
"retention": headphones.CONFIG.USENET_RETENTION,
"search": term
}
data = request.request_json(
url='http://api.omgwtfnzbs.org/json/',
params=params, headers=headers
)
# Parse response
if data:
if 'notice' in data:
logger.info(u"No results returned from omgwtfnzbs: %s" % data['notice'])
else:
for item in data:
try:
url = item['getnzb']
title = item['release']
size = int(item['sizebytes'])
resultlist.append((title, size, url, provider, 'nzb', True))
logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size))
except Exception as e:
logger.exception("Unhandled exception")
# attempt to verify that this isn't a substring result
# when looking for "Foo - Foo" we don't want "Foobar"
# this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
#
# Also will filter flac & remix albums if not specifically looking for it
# This code also checks the ignored words and required words
results = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)]
# Additional filtering for size etc
if results and not choose_specific_download:
results = more_filtering(results, album, albumlength, new)
return results
def send_to_downloader(data, bestqual, album):
logger.info(u'Found best result from %s: <a href="%s">%s</a> - %s', bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1]))
# Get rid of any dodgy chars here so we can prevent sab from renaming our downloads
kind = bestqual[4]
seed_ratio = None
torrentid = None
if kind == 'nzb':
folder_name = helpers.sab_sanitize_foldername(bestqual[0])
if headphones.CONFIG.NZB_DOWNLOADER == 1:
nzb = classes.NZBDataSearchResult()
nzb.extraInfo.append(data)
nzb.name = folder_name
if not nzbget.sendNZB(nzb):
return
elif headphones.CONFIG.NZB_DOWNLOADER == 0:
nzb = classes.NZBDataSearchResult()
nzb.extraInfo.append(data)
nzb.name = folder_name
if not sab.sendNZB(nzb):
return
# If we sent the file to sab, we can check how it was renamed and insert that into the snatched table
(replace_spaces, replace_dots) = sab.checkConfig()
if replace_dots:
folder_name = helpers.sab_replace_dots(folder_name)
if replace_spaces:
folder_name = helpers.sab_replace_spaces(folder_name)
else:
nzb_name = folder_name + '.nzb'
download_path = os.path.join(headphones.CONFIG.BLACKHOLE_DIR, nzb_name)
try:
prev = os.umask(headphones.UMASK)
with open(download_path, 'w') as fp:
fp.write(data)
os.umask(prev)
logger.info('File saved to: %s', nzb_name)
except Exception as e:
logger.error('Couldn\'t write NZB file: %s', e)
return
else:
folder_name = '%s - %s [%s]' % (helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), get_year_from_release_date(album['ReleaseDate']))
# Blackhole
if headphones.CONFIG.TORRENT_DOWNLOADER == 0:
# Get torrent name from .torrent, this is usually used by the torrent client as the folder name
torrent_name = helpers.replace_illegal_chars(folder_name) + '.torrent'
download_path = os.path.join(headphones.CONFIG.TORRENTBLACKHOLE_DIR, torrent_name)
if bestqual[2].lower().startswith("magnet:"):
if headphones.CONFIG.MAGNET_LINKS == 1:
try:
if headphones.SYS_PLATFORM == 'win32':
os.startfile(bestqual[2])
elif headphones.SYS_PLATFORM == 'darwin':
subprocess.Popen(["open", bestqual[2]], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
subprocess.Popen(["xdg-open", bestqual[2]], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Gonna just take a guess at this..... Is there a better way to find this out?
folder_name = bestqual[0]
except Exception as e:
logger.error("Error opening magnet link: %s" % str(e))
return
elif headphones.CONFIG.MAGNET_LINKS == 2:
# Procedure adapted from CouchPotato
torrent_hash = calculate_torrent_hash(bestqual[2])
# Randomize list of services
services = TORRENT_TO_MAGNET_SERVICES[:]
random.shuffle(services)
for service in services:
data = request.request_content(service % torrent_hash)
if data and "torcache" in data:
if not torrent_to_file(download_path, data):
return
# Extract folder name from torrent
folder_name = read_torrent_name(download_path,
bestqual[0])
# Break for loop
break
else:
# No service succeeded
logger.warning("Unable to convert magnet with hash " \
"'%s' into a torrent file.", torrent_hash)
return
else:
logger.error("Cannot save magnet link in blackhole. " \
"Please switch your torrent downloader to " \
"Transmission or uTorrent, or allow Headphones " \
"to open or convert magnet links")
return
else:
if bestqual[3] == "rutracker.org":
download_path, _ = rutracker.get_torrent(bestqual[2],
headphones.CONFIG.TORRENTBLACKHOLE_DIR)
if not download_path:
return
else:
if not torrent_to_file(download_path, data):
return
# Extract folder name from torrent
folder_name = read_torrent_name(download_path, bestqual[0])
if folder_name:
logger.info('Torrent folder name: %s' % folder_name)
elif headphones.CONFIG.TORRENT_DOWNLOADER == 1:
logger.info("Sending torrent to Transmission")
# rutracker needs cookies to be set, pass the .torrent file instead of url
if bestqual[3] == 'rutracker.org':
file_or_url, torrentid = rutracker.get_torrent(bestqual[2])
else:
file_or_url = bestqual[2]
torrentid = transmission.addTorrent(file_or_url)
if not torrentid:
logger.error("Error sending torrent to Transmission. Are you sure it's running?")
return
folder_name = transmission.getTorrentFolder(torrentid)
if folder_name:
logger.info('Torrent folder name: %s' % folder_name)
else:
logger.error('Torrent folder name could not be determined')
return
# remove temp .torrent file created above
if bestqual[3] == 'rutracker.org':
try:
shutil.rmtree(os.path.split(file_or_url)[0])
except Exception as e:
logger.exception("Unhandled exception")
# Set Seed Ratio
seed_ratio = get_seed_ratio(bestqual[3])
if seed_ratio is not None:
transmission.setSeedRatio(torrentid, seed_ratio)
else:# if headphones.CONFIG.TORRENT_DOWNLOADER == 2:
logger.info("Sending torrent to uTorrent")
# rutracker needs cookies to be set, pass the .torrent file instead of url
if bestqual[3] == 'rutracker.org':
file_or_url, torrentid = rutracker.get_torrent(bestqual[2])
folder_name, cacheid = utorrent.dirTorrent(torrentid)
folder_name = os.path.basename(os.path.normpath(folder_name))
utorrent.labelTorrent(torrentid)
else:
file_or_url = bestqual[2]
torrentid = calculate_torrent_hash(file_or_url, data)
folder_name = utorrent.addTorrent(file_or_url, torrentid)
if folder_name:
logger.info('Torrent folder name: %s' % folder_name)
else:
logger.error('Torrent folder name could not be determined')
return
# remove temp .torrent file created above
if bestqual[3] == 'rutracker.org':
try:
shutil.rmtree(os.path.split(file_or_url)[0])
except Exception as e:
logger.exception("Unhandled exception")
# Set Seed Ratio
seed_ratio = get_seed_ratio(bestqual[3])
if seed_ratio is not None:
utorrent.setSeedRatio(torrentid, seed_ratio)
myDB = db.DBConnection()
myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [album['AlbumID']])
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Snatched", folder_name, kind])
# Store the torrent id so we can check later if it's finished seeding and can be removed
if seed_ratio is not None and seed_ratio != 0 and torrentid:
myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Seed_Snatched", torrentid, kind])
# notify
artist = album[1]
albumname = album[2]
rgid = album[6]
title = artist + ' - ' + albumname
provider = bestqual[3]
if provider.startswith(("http://", "https://")):
provider = provider.split("//")[1]
name = folder_name if folder_name else None
if headphones.CONFIG.GROWL_ENABLED and headphones.CONFIG.GROWL_ONSNATCH:
logger.info(u"Sending Growl notification")
growl = notifiers.GROWL()
growl.notify(name, "Download started")
if headphones.CONFIG.PROWL_ENABLED and headphones.CONFIG.PROWL_ONSNATCH:
logger.info(u"Sending Prowl notification")
prowl = notifiers.PROWL()
prowl.notify(name, "Download started")
if headphones.CONFIG.PUSHOVER_ENABLED and headphones.CONFIG.PUSHOVER_ONSNATCH:
logger.info(u"Sending Pushover notification")
prowl = notifiers.PUSHOVER()
prowl.notify(name, "Download started")
if headphones.CONFIG.PUSHBULLET_ENABLED and headphones.CONFIG.PUSHBULLET_ONSNATCH:
logger.info(u"Sending PushBullet notification")
pushbullet = notifiers.PUSHBULLET()
pushbullet.notify(name + " has been snatched!", "Download started")
if headphones.CONFIG.TWITTER_ENABLED and headphones.CONFIG.TWITTER_ONSNATCH:
logger.info(u"Sending Twitter notification")
twitter = notifiers.TwitterNotifier()
twitter.notify_snatch(name)
if headphones.CONFIG.NMA_ENABLED and headphones.CONFIG.NMA_ONSNATCH:
logger.info(u"Sending NMA notification")
nma = notifiers.NMA()
nma.notify(snatched=name)
if headphones.CONFIG.PUSHALOT_ENABLED and headphones.CONFIG.PUSHALOT_ONSNATCH:
logger.info(u"Sending Pushalot notification")
pushalot = notifiers.PUSHALOT()
pushalot.notify(name, "Download started")
if headphones.CONFIG.OSX_NOTIFY_ENABLED and headphones.CONFIG.OSX_NOTIFY_ONSNATCH:
from headphones import cache
c = cache.Cache()
album_art = c.get_artwork_from_cache(None, rgid)
logger.info(u"Sending OS X notification")
osx_notify = notifiers.OSX_NOTIFY()
osx_notify.notify(artist,
albumname,
'Snatched: ' + provider + '. ' + name,
image=album_art)
if headphones.CONFIG.BOXCAR_ENABLED and headphones.CONFIG.BOXCAR_ONSNATCH:
logger.info(u"Sending Boxcar2 notification")
b2msg = 'From ' + provider + '<br></br>' + name
boxcar = notifiers.BOXCAR()
boxcar.notify('Headphones snatched: ' + title, b2msg, rgid)
if headphones.CONFIG.EMAIL_ENABLED and headphones.CONFIG.EMAIL_ONSNATCH:
logger.info(u"Sending Email notification")
email = notifiers.Email()
message = 'Snatched from ' + provider + '. ' + name
email.notify(title, message)
def verifyresult(title, artistterm, term, lossless):
title = re.sub('[\.\-\/\_]', ' ', title)
#if artistterm != 'Various Artists':
#
# if not re.search('^' + re.escape(artistterm), title, re.IGNORECASE):
# #logger.info("Removed from results: " + title + " (artist not at string start).")
# #return False
# elif re.search(re.escape(artistterm) + '\w', title, re.IGNORECASE | re.UNICODE):
# logger.info("Removed from results: " + title + " (post substring result).")
# return False
# elif re.search('\w' + re.escape(artistterm), title, re.IGNORECASE | re.UNICODE):
# logger.info("Removed from results: " + title + " (pre substring result).")
# return False
#another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II"
# Filter out remix search results (if we're not looking for it)
if 'remix' not in term.lower() and 'remix' in title.lower():
logger.info("Removed %s from results because it's a remix album and we're not looking for a remix album right now.", title)
return False
# Filter out FLAC if we're not specifically looking for it
if headphones.CONFIG.PREFERRED_QUALITY == (0 or '0') and 'flac' in title.lower() and not lossless:
logger.info("Removed %s from results because it's a lossless album and we're not looking for a lossless album right now.", title)
return False
if headphones.CONFIG.IGNORED_WORDS:
for each_word in helpers.split_string(headphones.CONFIG.IGNORED_WORDS):
if each_word.lower() in title.lower():
logger.info("Removed '%s' from results because it contains ignored word: '%s'", title, each_word)
return False
if headphones.CONFIG.REQUIRED_WORDS:
for each_word in helpers.split_string(headphones.CONFIG.REQUIRED_WORDS):
if ' OR ' in each_word:
or_words = helpers.split_string(each_word, 'OR')
if any(word.lower() in title.lower() for word in or_words):
continue
else:
logger.info("Removed '%s' from results because it doesn't contain any of the required words in: '%s'", title, str(or_words))
return False
if each_word.lower() not in title.lower():
logger.info("Removed '%s' from results because it doesn't contain required word: '%s'", title, each_word)
return False
if headphones.CONFIG.IGNORE_CLEAN_RELEASES:
for each_word in ['clean','edited','censored']:
logger.debug("Checking if '%s' is in search result: '%s'", each_word, title)
if each_word.lower() in title.lower() and each_word.lower() not in term.lower():
logger.info("Removed '%s' from results because it contains clean album word: '%s'", title, each_word)
return False
tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
for token in tokens:
if not token:
continue
if token == 'Various' or token == 'Artists' or token == 'VA':
continue
if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
cleantoken = ''.join(c for c in token if c not in string.punctuation)
if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
dic = {'!': 'i', '$': 's'}
dumbtoken = helpers.replace_all(token, dic)
if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
logger.info("Removed from results: %s (missing tokens: %s and %s)", title, token, cleantoken)
return False
return True
def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False):
global gazelle # persistent what.cd api object to reduce number of login attempts
# rutracker login
if headphones.CONFIG.RUTRACKER and album:
rulogin = rutracker.login(headphones.CONFIG.RUTRACKER_USER, headphones.CONFIG.RUTRACKER_PASSWORD)
if not rulogin:
logger.info(u'Could not login to rutracker, search results will exclude this provider')
albumid = album['AlbumID']
reldate = album['ReleaseDate']
year = get_year_from_release_date(reldate)
# MERGE THIS WITH THE TERM CLEANUP FROM searchNZB
dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': ''}
semi_cleanalbum = helpers.replace_all(album['AlbumTitle'], dic)
cleanalbum = helpers.latinToAscii(semi_cleanalbum)
semi_cleanartist = helpers.replace_all(album['ArtistName'], dic)
cleanartist = helpers.latinToAscii(semi_cleanartist)
# Use provided term if available, otherwise build our own (this code needs to be cleaned up since a lot
# of these torrent providers are just using cleanartist/cleanalbum terms
if album['SearchTerm']:
term = album['SearchTerm']
else:
# FLAC usually doesn't have a year for some reason so I'll leave it out
# Various Artist albums might be listed as VA, so I'll leave that out too
# Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4:
term = cleanartist + ' ' + cleanalbum + ' ' + year
elif album['ArtistName'] == 'Various Artists':
term = cleanalbum + ' ' + year
else:
term = cleanartist + ' ' + cleanalbum
# Save user search term
if album['SearchTerm']:
usersearchterm = term
else:
usersearchterm = ''
semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace')
semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace')
# Replace bad characters in the term and unicode it
term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace')
albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace')
# If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless
if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
allow_lossless = True
else:
allow_lossless = False
logger.debug("Using search term: %s" % term)
resultlist = []
minimumseeders = int(headphones.CONFIG.NUMBEROFSEEDERS) - 1
def set_proxy(proxy_url):
if not proxy_url.startswith('http'):
proxy_url = 'http://' + proxy_url
if proxy_url.endswith('/'):
proxy_url = proxy_url[:-1]
return proxy_url
if headphones.CONFIG.KAT:
provider = "Kick Ass Torrents"
ka_term = term.replace("!", "")
# Use proxy if specified
if headphones.CONFIG.KAT_PROXY_URL:
providerurl = fix_url(set_proxy(headphones.CONFIG.KAT_PROXY_URL))
else:
providerurl = fix_url("https://kickass.to")
# Build URL
providerurl = providerurl + "/usearch/" + ka_term
# Pick category for torrents
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
format = "2" # FLAC
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
format = "10" # MP3 and FLAC
maxsize = 10000000000
else:
format = "8" # MP3 only
maxsize = 300000000
# Requesting content
logger.info("Searching KAT using term: %s", ka_term)
params = {
"categories[0]": "music",
"field": "seeders",
"sorder": "desc",
"rss": "1"
}
data = request.request_feed(url=providerurl, params=params)
# Process feed
if data:
if not len(data.entries):
logger.info("No results found")
else:
for item in data.entries:
try:
rightformat = True
title = item['title']
seeders = item['torrent_seeds']
url = item['links'][1]['href']
size = int(item['links'][1]['length'])
if format == "2":
torrent = request.request_content(url)
if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1):
rightformat = False
if rightformat and size < maxsize and minimumseeders < int(seeders):
match = True
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
match = False
logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat)
resultlist.append((title, size, url, provider, 'torrent', match))
except Exception as e:
logger.exception("Unhandled exception in the KAT parser")
if headphones.CONFIG.WAFFLES:
provider = "Waffles.fm"
providerurl = fix_url("https://www.waffles.fm/browse.php")
bitrate = None
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
format = "FLAC"
bitrate = "(Lossless)"
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
format = "FLAC OR MP3"
maxsize = 10000000000
else:
format = "MP3"
maxsize = 300000000
if not usersearchterm:
query_items = ['artist:"%s"' % artistterm,
'album:"%s"' % albumterm,
'year:(%s)' % year]
else:
query_items = [usersearchterm]
query_items.extend(['format:(%s)' % format,
'size:[0 TO %d]' % maxsize,
'-seeders:0']) # cut out dead torrents
if bitrate:
query_items.append('bitrate:"%s"' % bitrate)
# Requesting content
logger.info('Parsing results from Waffles')
params = {
"uid": headphones.CONFIG.WAFFLES_UID,
"passkey": headphones.CONFIG.WAFFLES_PASSKEY,
"rss": "1",
"c0": "1",
"s": "seeders", # sort by
"d": "desc", # direction
"q": " ".join(query_items)
}
data = request.request_feed(
url=providerurl,
params=params,
timeout=20
)
# Process feed
if data:
if not len(data.entries):
logger.info(u"No results found from %s for %s", provider, term)
else:
for item in data.entries:
try:
title = item.title
desc_match = re.search(r"Size: (\d+)<", item.description)
size = int(desc_match.group(1))
url = item.link
resultlist.append((title, size, url, provider, 'torrent', True))
logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size))
except Exception as e:
logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s", e)
# rutracker.org
if headphones.CONFIG.RUTRACKER and rulogin:
provider = "rutracker.org"
# Ignore if release date not specified, results too unpredictable
if not year and not usersearchterm:
logger.info(u'Release date not specified, ignoring for rutracker.org')
else:
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
format = 'lossless'
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
format = 'lossless+mp3'
maxsize = 10000000000
else:
format = 'mp3'
maxsize = 300000000
# build search url based on above
if not usersearchterm:
searchURL = rutracker.searchurl(artistterm, albumterm, year, format)
else:
searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format)
logger.info(u'Parsing results from <a href="%s">rutracker.org</a>' % searchURL)
# parse results and get best match
rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid)
# add best match to overall results list
if rulist:
for ru in rulist:
title = ru[0].decode('utf-8')
size = ru[1]
url = ru[2]
resultlist.append((title, size, url, provider, 'torrent', True))
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
logger.info(u"No valid results found from %s" % (provider))
if headphones.CONFIG.WHATCD:
provider = "What.cd"
providerurl = "http://what.cd/"
bitrate = None
bitrate_string = bitrate
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: # Lossless Only mode
search_formats = [gazelleformat.FLAC]
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 2: # Preferred quality mode
search_formats = [None] # should return all
bitrate = headphones.CONFIG.PREFERRED_BITRATE
if bitrate:
for encoding_string in gazelleencoding.ALL_ENCODINGS:
if re.search(bitrate, encoding_string, flags=re.I):
bitrate_string = encoding_string
if bitrate_string not in gazelleencoding.ALL_ENCODINGS:
logger.info(u"Your preferred bitrate is not one of the available What.cd filters, so not using it as a search parameter.")
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: # Highest quality including lossless
search_formats = [gazelleformat.FLAC, gazelleformat.MP3]
maxsize = 10000000000
else: # Highest quality excluding lossless
search_formats = [gazelleformat.MP3]
maxsize = 300000000
if not gazelle or not gazelle.logged_in():
try:
logger.info(u"Attempting to log in to What.cd...")
gazelle = gazelleapi.GazelleAPI(headphones.CONFIG.WHATCD_USERNAME, headphones.CONFIG.WHATCD_PASSWORD)
gazelle._login()
except Exception as e:
gazelle = None
logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e)))
if gazelle and gazelle.logged_in():
logger.info(u"Searching %s..." % provider)
all_torrents = []
for search_format in search_formats:
all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term,
groupname=semi_clean_album_term,
format=search_format, encoding=bitrate_string)['results'])
# filter on format, size, and num seeders
logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...")
match_torrents = [t for t in all_torrents if t.size <= maxsize and t.seeders >= minimumseeders]
logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents))
# sort by times d/l'd
if not len(match_torrents):
logger.info(u"No results found from %s for %s after filtering" % (provider, term))
elif len(match_torrents) > 1:
logger.info(u"Found %d matching releases from %s for %s - %s after filtering" %
(len(match_torrents), provider, artistterm, albumterm))
logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string)
match_torrents.sort(key=lambda x: int(x.snatched), reverse=True)
if gazelleformat.MP3 in search_formats:
# sort by size after rounding to nearest 10MB...hacky, but will favor highest quality
match_torrents.sort(key=lambda x: int(10 * round(x.size / 1024. / 1024. / 10.)), reverse=True)
if search_formats and None not in search_formats:
match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless
# if bitrate:
# match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True)
# match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True)
logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents))
for torrent in match_torrents:
if not torrent.file_path:
torrent.group.update_group_data() # will load the file_path for the individual torrents
resultlist.append((torrent.file_path,
torrent.size,
gazelle.generate_torrent_link(torrent.id),
provider,
'torrent', True))
# Pirate Bay
if headphones.CONFIG.PIRATEBAY:
provider = "The Pirate Bay"
tpb_term = term.replace("!", "")
# Use proxy if specified
if headphones.CONFIG.PIRATEBAY_PROXY_URL:
providerurl = fix_url(set_proxy(headphones.CONFIG.PIRATEBAY_PROXY_URL))
else:
providerurl = fix_url("https://thepiratebay.se")
# Build URL
providerurl = providerurl + "/search/" + tpb_term + "/0/7/" # 7 is sort by seeders
# Pick category for torrents
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
category = '104' # FLAC
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
category = '100' # General audio category
maxsize = 10000000000
else:
category = '101' # MP3 only
maxsize = 300000000
# Request content
logger.info("Searching The Pirate Bay using term: %s", tpb_term)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'}
data = request.request_soup(url=providerurl + category, headers=headers)
# Process content
if data:
rows = data.select('table tbody tr')
if not rows:
logger.info("No results found")
else:
for item in rows:
try:
url = None
rightformat = True
title = ''.join(item.find("a", {"class": "detLink"}))
seeds = int(''.join(item.find("td", {"align": "right"})))
if headphones.CONFIG.TORRENT_DOWNLOADER == 0:
try:
url = item.find("a", {"title": "Download this torrent"})['href']
except TypeError:
if headphones.CONFIG.MAGNET_LINKS != 0:
url = item.findAll("a")[3]['href']
else:
logger.info('"%s" only has a magnet link, skipping' % title)
continue
else:
url = item.findAll("a")[3]["href"]
if url.lower().startswith("//"):
url = "http:" + url
formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ')
size = helpers.piratesize(formatted_size)
if size < maxsize and minimumseeders < seeds and url is not None:
match = True
logger.info('Found %s. Size: %s' % (title, formatted_size))
else:
match = False
logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds)))
resultlist.append((title, size, url, provider, "torrent", match))
except Exception as e:
logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e)
# Old Pirate Bay Compatible
if headphones.CONFIG.OLDPIRATEBAY:
provider = "Old Pirate Bay"
tpb_term = term.replace("!", "")
# Pick category for torrents
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
maxsize = 10000000000
else:
maxsize = 300000000
# Requesting content
logger.info("Parsing results from Old Pirate Bay")
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'}
provider_url = fix_url(headphones.CONFIG.OLDPIRATEBAY_URL) + \
"/search.php?" + urllib.urlencode({"q": tpb_term, "iht": 6})
data = request.request_soup(url=provider_url, headers=headers)
# Process content
if data:
rows = data.select('table tbody tr')
if not rows:
logger.info("No results found")
else:
for item in rows:
try:
links = item.select("td.title-row a")
rightformat = True
title = links[1].text
seeds = int(item.select("td.seeders-row")[0].text)
url = links[0]["href"] # Magnet link. The actual download link is not based on the URL
formatted_size = item.select("td.size-row")[0].text
size = helpers.piratesize(formatted_size)
if size < maxsize and minimumseeders < seeds and url is not None:
match = True
logger.info('Found %s. Size: %s' % (title, formatted_size))
else:
match = False
logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds)))
resultlist.append((title, size, url, provider, "torrent", match))
except Exception as e:
logger.error(u"An unknown error occurred in the Old Pirate Bay parser: %s" % e)
# Mininova
if headphones.CONFIG.MININOVA:
provider = "Mininova"
providerurl = fix_url("http://www.mininova.org/rss/" + term + "/5")
if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
# categories = "7" #music
format = "2" #flac
maxsize = 10000000000
elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
# categories = "7" #music
format = "10" #mp3+flac
maxsize = 10000000000
else:
# categories = "7" #music
format = "8" #mp3
maxsize = 300000000
# Requesting content
logger.info('Parsing results from Mininova')
data = request.request_feed(
url=providerurl,
timeout=20
)
# Process feed
if data:
if not len(data.entries):
logger.info(u"No results found from %s for %s" % (provider, term))
else:
for item in data.entries:
try:
rightformat = True
title = item.title
sxstart = item.description.find("Ratio: ") + 7
seeds = ""
while item.description[sxstart:sxstart + 1] != " ":
seeds = seeds + item.description[sxstart:sxstart + 1]
sxstart = sxstart + 1
url = item.links[1]['url']
size = int(item.links[1]['length'])
if format == "2":
torrent = request.request_content(url)
if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1):
rightformat = False
if rightformat and size < maxsize and minimumseeders < seeds:
match = True
logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
else:
match = False
logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat))
resultlist.append((title, size, url, provider, 'torrent', match))
except Exception as e:
logger.exception("Unhandled exception in Mininova Parser")
#attempt to verify that this isn't a substring result
#when looking for "Foo - Foo" we don't want "Foobar"
#this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
results = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)]
# Additional filtering for size etc
if results and not choose_specific_download:
results = more_filtering(results, album, albumlength, new)
return results
# THIS IS KIND OF A MESS AND PROBABLY NEEDS TO BE CLEANED UP
def preprocess(resultlist):
for result in resultlist:
if result[4] == 'torrent':
#Get out of here if we're using Transmission
if headphones.CONFIG.TORRENT_DOWNLOADER == 1: ## if not a magnet link still need the .torrent to generate hash... uTorrent support labeling
return True, result
# get outta here if rutracker
if result[3] == 'rutracker.org':
return True, result
# Get out of here if it's a magnet link
if result[2].lower().startswith("magnet:"):
return True, result
# Download the torrent file
headers = {}
if result[3] == 'Kick Ass Torrents':
headers['Referer'] = 'http://kat.ph/'
elif result[3] == 'What.cd':
headers['User-Agent'] = 'Headphones'
elif result[3] == "The Pirate Bay" or result[3] == "Old Pirate Bay":
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'
return request.request_content(url=result[2], headers=headers), result
else:
headers = {'User-Agent': USER_AGENT}
if result[3] == 'headphones':
return request.request_content(url=result[2], headers=headers, auth=(headphones.CONFIG.HPUSER, headphones.CONFIG.HPPASS)), result
else:
return request.request_content(url=result[2], headers=headers), result