mirror of
https://github.com/rembo10/headphones.git
synced 2026-05-24 12:27:45 +01:00
298 lines
11 KiB
Python
298 lines
11 KiB
Python
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
|
|
# Headphones rutracker.org search
|
|
# Functions called from searcher.py
|
|
|
|
import urllib
|
|
import urllib2
|
|
import cookielib
|
|
from urlparse import urlparse
|
|
from bs4 import BeautifulSoup
|
|
import headphones
|
|
from headphones import logger, db
|
|
import lib.bencode as bencode
|
|
import os
|
|
from tempfile import mkdtemp
|
|
|
|
class Rutracker():
|
|
|
|
logged_in = False
|
|
# Stores a number of login attempts to prevent recursion.
|
|
#login_counter = 0
|
|
|
|
def __init__(self):
|
|
|
|
self.cookiejar = cookielib.CookieJar()
|
|
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
|
|
urllib2.install_opener(self.opener)
|
|
|
|
def login(self, login, password):
|
|
"""Implements tracker login procedure."""
|
|
|
|
self.logged_in = False
|
|
|
|
if login is None or password is None:
|
|
return False
|
|
|
|
#self.login_counter += 1
|
|
|
|
# No recursion wanted.
|
|
#if self.login_counter > 1:
|
|
# return False
|
|
|
|
params = urllib.urlencode({"login_username" : login,
|
|
"login_password" : password,
|
|
"login" : "Вход"})
|
|
|
|
try:
|
|
self.opener.open("http://login.rutracker.org/forum/login.php", params)
|
|
except :
|
|
pass
|
|
|
|
# Check if we're logged in
|
|
|
|
for cookie in self.cookiejar:
|
|
if cookie.name == 'bb_data':
|
|
self.logged_in = True
|
|
|
|
return self.logged_in
|
|
|
|
def searchurl(self, artist, album, year, format):
|
|
"""
|
|
Return the search url
|
|
"""
|
|
|
|
# Build search url
|
|
|
|
searchterm = ''
|
|
if artist != 'Various Artists':
|
|
searchterm = artist
|
|
searchterm = searchterm + ' '
|
|
searchterm = searchterm + album
|
|
searchterm = searchterm + ' '
|
|
searchterm = searchterm + year
|
|
|
|
providerurl = "http://rutracker.org/forum/tracker.php"
|
|
|
|
if format == 'lossless':
|
|
format = '+lossless'
|
|
elif format == 'lossless+mp3':
|
|
format = '+lossless||mp3||aac'
|
|
else:
|
|
format = '+mp3||aac'
|
|
|
|
# sort by size, descending.
|
|
|
|
sort = '&o=7&s=2'
|
|
|
|
searchurl = "%s?nm=%s%s%s" % (providerurl, urllib.quote(searchterm), format, sort)
|
|
|
|
return searchurl
|
|
|
|
def search(self, searchurl, maxsize, minseeders, albumid, bitrate):
|
|
"""
|
|
Parse the search results and return valid torrent list
|
|
"""
|
|
|
|
titles = []
|
|
urls = []
|
|
seeders = []
|
|
sizes = []
|
|
torrentlist = []
|
|
rulist = []
|
|
|
|
try:
|
|
|
|
page = self.opener.open(searchurl, timeout=60)
|
|
soup = BeautifulSoup(page.read())
|
|
|
|
# Debug
|
|
#logger.debug (soup.prettify())
|
|
|
|
# Title
|
|
|
|
for link in soup.find_all('a', attrs={'class' : 'med tLink hl-tags bold'}):
|
|
title = link.get_text()
|
|
titles.append(title)
|
|
|
|
# Download URL
|
|
|
|
for link in soup.find_all('a', attrs={'class' : 'small tr-dl dl-stub'}):
|
|
url = link.get('href')
|
|
urls.append(url)
|
|
|
|
# Seeders
|
|
|
|
for link in soup.find_all('b', attrs={'class' : 'seedmed'}):
|
|
seeder = link.get_text()
|
|
seeders.append(seeder)
|
|
|
|
# Size
|
|
|
|
for link in soup.find_all('td', attrs={'class' : 'row4 small nowrap tor-size'}):
|
|
size = link.u.string
|
|
sizes.append(size)
|
|
|
|
except :
|
|
pass
|
|
|
|
# Combine lists
|
|
|
|
torrentlist = zip(titles, urls, seeders, sizes)
|
|
|
|
# return if nothing found
|
|
|
|
if not torrentlist:
|
|
return False
|
|
|
|
# get headphones track count for album, return if not found
|
|
|
|
myDB = db.DBConnection()
|
|
tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid])
|
|
hptrackcount = len(tracks)
|
|
|
|
if not hptrackcount:
|
|
logger.info('headphones track info not found, cannot compare to torrent')
|
|
return False
|
|
|
|
# Return all valid entries, ignored, required words now checked in searcher.py
|
|
|
|
#unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd']
|
|
|
|
formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif']
|
|
deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive']
|
|
|
|
for torrent in torrentlist:
|
|
|
|
returntitle = torrent[0].encode('utf-8')
|
|
url = torrent[1]
|
|
seeders = torrent[2]
|
|
size = torrent[3]
|
|
|
|
title = returntitle.lower()
|
|
|
|
if int(size) <= maxsize and int(seeders) >= minseeders:
|
|
|
|
# Check torrent info
|
|
|
|
torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
|
|
self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
|
|
|
|
# Debug
|
|
#for cookie in self.cookiejar:
|
|
# logger.debug ('Cookie: %s' % cookie)
|
|
|
|
try:
|
|
page = self.opener.open(url)
|
|
torrent = page.read()
|
|
if torrent:
|
|
decoded = bencode.bdecode(torrent)
|
|
metainfo = decoded['info']
|
|
page.close ()
|
|
except Exception, e:
|
|
logger.error('Error getting torrent: %s' % e)
|
|
return False
|
|
|
|
# get torrent track count and check for cue
|
|
|
|
trackcount = 0
|
|
cuecount = 0
|
|
|
|
if 'files' in metainfo: # multi
|
|
for pathfile in metainfo['files']:
|
|
path = pathfile['path']
|
|
for file in path:
|
|
if any(file.lower().endswith('.' + x.lower()) for x in formatlist):
|
|
trackcount += 1
|
|
if '.cue' in file:
|
|
cuecount += 1
|
|
|
|
#Torrent topic page
|
|
|
|
topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id
|
|
logger.debug ('torrent title: %s' % title)
|
|
logger.debug ('headphones trackcount: %s' % hptrackcount)
|
|
logger.debug ('rutracker trackcount: %s' % trackcount)
|
|
|
|
# If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
|
|
# This is for the case where we have a single .flac/.wav which can be split by cue
|
|
# Not great, but shouldn't be doing this too often
|
|
|
|
totallogcount = 0
|
|
if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
|
|
page = self.opener.open(topicurl, timeout=60)
|
|
soup = BeautifulSoup(page.read())
|
|
findtoc = soup.find_all(text='TOC of the extracted CD')
|
|
if not findtoc:
|
|
findtoc = soup.find_all(text='TOC извлечённого CD')
|
|
for toc in findtoc:
|
|
logcount = 0
|
|
for toccontent in toc.find_all_next(text=True):
|
|
cut_string = toccontent.split('|')
|
|
new_string = cut_string[0].lstrip().rstrip()
|
|
if new_string == '1' or new_string == '01':
|
|
logcount = 1
|
|
elif logcount > 0:
|
|
if new_string.isdigit():
|
|
logcount += 1
|
|
else:
|
|
break
|
|
totallogcount = totallogcount + logcount
|
|
|
|
if totallogcount > 0:
|
|
trackcount = totallogcount
|
|
logger.debug ('rutracker logtrackcount: %s' % totallogcount)
|
|
|
|
# If torrent track count = hp track count then return torrent,
|
|
# if greater, check for deluxe/special/foreign editions
|
|
# if less, then allow if it's a single track with a cue
|
|
|
|
valid = False
|
|
|
|
if trackcount == hptrackcount:
|
|
valid = True
|
|
elif trackcount > hptrackcount:
|
|
if any(deluxe in title for deluxe in deluxelist):
|
|
valid = True
|
|
|
|
# Add to list
|
|
|
|
if valid:
|
|
rulist.append((returntitle, size, topicurl))
|
|
else:
|
|
if topicurl:
|
|
logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount))
|
|
|
|
else:
|
|
logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
|
|
|
|
|
|
return rulist
|
|
|
|
def get_torrent(self, url, savelocation=None):
|
|
|
|
torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
|
|
self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
|
|
downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id
|
|
torrent_name = torrent_id + '.torrent'
|
|
|
|
try:
|
|
prev = os.umask(headphones.UMASK)
|
|
page = self.opener.open(downloadurl)
|
|
torrent = page.read()
|
|
if savelocation:
|
|
download_path = os.path.join(savelocation, torrent_name)
|
|
else:
|
|
tempdir = mkdtemp(suffix='_rutracker_torrents')
|
|
download_path = os.path.join(tempdir, torrent_name)
|
|
fp = open (download_path, 'wb')
|
|
fp.write (torrent)
|
|
fp.close ()
|
|
os.umask(prev)
|
|
except Exception, e:
|
|
logger.error('Error getting torrent: %s' % e)
|
|
return False
|
|
|
|
return download_path
|