From 8ab19f8121344651aeb9a643d76da89664a22e44 Mon Sep 17 00:00:00 2001 From: Ade Date: Sat, 10 May 2014 19:06:09 +1200 Subject: [PATCH] remove rutracker track count check should be able to filter out unwanted entries with Quality and Search Word settings --- headphones/searcher_rutracker.py | 220 +++++++++++++++---------------- 1 file changed, 107 insertions(+), 113 deletions(-) diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py index 8837d205..3c799f24 100644 --- a/headphones/searcher_rutracker.py +++ b/headphones/searcher_rutracker.py @@ -18,6 +18,7 @@ from tempfile import mkdtemp class Rutracker(): logged_in = False + # Stores a number of login attempts to prevent recursion. #login_counter = 0 @@ -29,7 +30,7 @@ class Rutracker(): def login(self, login, password): """Implements tracker login procedure.""" - + self.logged_in = False if login is None or password is None: @@ -51,7 +52,6 @@ class Rutracker(): pass # Check if we're logged in - for cookie in self.cookiejar: if cookie.name == 'bb_data': self.logged_in = True @@ -64,7 +64,6 @@ class Rutracker(): """ # Build search url - searchterm = '' if artist != 'Various Artists': searchterm = artist @@ -82,8 +81,7 @@ class Rutracker(): else: format = '+mp3||aac' - # sort by size, descending. - + # sort by size, descending. sort = '&o=7&s=2' searchurl = "%s?nm=%s%s%s" % (providerurl, urllib.quote(searchterm), format, sort) @@ -111,25 +109,21 @@ class Rutracker(): #logger.debug (soup.prettify()) # Title - for link in soup.find_all('a', attrs={'class' : 'med tLink hl-tags bold'}): title = link.get_text() titles.append(title) # Download URL - for link in soup.find_all('a', attrs={'class' : 'small tr-dl dl-stub'}): url = link.get('href') urls.append(url) # Seeders - for link in soup.find_all('b', attrs={'class' : 'seedmed'}): seeder = link.get_text() seeders.append(seeder) # Size - for link in soup.find_all('td', attrs={'class' : 'row4 small nowrap tor-size'}): size = link.u.string sizes.append(size) @@ -138,30 +132,33 @@ class Rutracker(): pass # Combine lists - torrentlist = zip(titles, urls, seeders, sizes) # return if nothing found - if not torrentlist: return False - - # get headphones track count for album, return if not found - - myDB = db.DBConnection() - tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid]) - hptrackcount = len(tracks) - - if not hptrackcount: - logger.info('headphones track info not found, cannot compare to torrent') - return False - - # Return all valid entries, ignored, required words now checked in searcher.py - - #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd'] - formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif'] - deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive'] + # don't bother checking track counts anymore, let searcher filter instead + # leave code in just in case + check_track_count = False + + if check_track_count: + + # get headphones track count for album, return if not found + myDB = db.DBConnection() + tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid]) + hptrackcount = len(tracks) + + if not hptrackcount: + logger.info('headphones track info not found, cannot compare to torrent') + return False + + # Return all valid entries, ignored, required words now checked in searcher.py + + #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd'] + + formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif'] + deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive'] for torrent in torrentlist: @@ -169,105 +166,102 @@ class Rutracker(): url = torrent[1] seeders = torrent[2] size = torrent[3] - - title = returntitle.lower() - - if int(size) <= maxsize and int(seeders) >= minseeders: - - # Check torrent info - - torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] - self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)) - - # Debug - #for cookie in self.cookiejar: - # logger.debug ('Cookie: %s' % cookie) - - try: - page = self.opener.open(url) - torrent = page.read() - if torrent: - decoded = bencode.bdecode(torrent) - metainfo = decoded['info'] - page.close () - except Exception, e: - logger.error('Error getting torrent: %s' % e) - return False - - # get torrent track count and check for cue - - trackcount = 0 - cuecount = 0 - - if 'files' in metainfo: # multi - for pathfile in metainfo['files']: - path = pathfile['path'] - for file in path: - if any(file.lower().endswith('.' + x.lower()) for x in formatlist): - trackcount += 1 - if '.cue' in file: - cuecount += 1 - - #Torrent topic page - - topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id - logger.debug ('torrent title: %s' % title) - logger.debug ('headphones trackcount: %s' % hptrackcount) - logger.debug ('rutracker trackcount: %s' % trackcount) - # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) - # This is for the case where we have a single .flac/.wav which can be split by cue - # Not great, but shouldn't be doing this too often - - totallogcount = 0 - if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: - page = self.opener.open(topicurl, timeout=60) - soup = BeautifulSoup(page.read()) - findtoc = soup.find_all(text='TOC of the extracted CD') - if not findtoc: - findtoc = soup.find_all(text='TOC извлечённого CD') - for toc in findtoc: - logcount = 0 - for toccontent in toc.find_all_next(text=True): - cut_string = toccontent.split('|') - new_string = cut_string[0].lstrip().rstrip() - if new_string == '1' or new_string == '01': - logcount = 1 - elif logcount > 0: - if new_string.isdigit(): - logcount += 1 - else: - break - totallogcount = totallogcount + logcount - - if totallogcount > 0: - trackcount = totallogcount - logger.debug ('rutracker logtrackcount: %s' % totallogcount) - - # If torrent track count = hp track count then return torrent, - # if greater, check for deluxe/special/foreign editions - # if less, then allow if it's a single track with a cue - - valid = False - - if trackcount == hptrackcount: + if int(size) <= maxsize and int(seeders) >= minseeders: + + #Torrent topic page + torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] + topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id + + # add to list + if not check_track_count: valid = True - elif trackcount > hptrackcount: - if any(deluxe in title for deluxe in deluxelist): + else: + + # Check torrent info + self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)) + + # Debug + #for cookie in self.cookiejar: + # logger.debug ('Cookie: %s' % cookie) + + try: + page = self.opener.open(url) + torrent = page.read() + if torrent: + decoded = bencode.bdecode(torrent) + metainfo = decoded['info'] + page.close () + except Exception, e: + logger.error('Error getting torrent: %s' % e) + return False + + # get torrent track count and check for cue + trackcount = 0 + cuecount = 0 + + if 'files' in metainfo: # multi + for pathfile in metainfo['files']: + path = pathfile['path'] + for file in path: + if any(file.lower().endswith('.' + x.lower()) for x in formatlist): + trackcount += 1 + if '.cue' in file: + cuecount += 1 + + title = returntitle.lower() + logger.debug ('torrent title: %s' % title) + logger.debug ('headphones trackcount: %s' % hptrackcount) + logger.debug ('rutracker trackcount: %s' % trackcount) + + # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) + # This is for the case where we have a single .flac/.wav which can be split by cue + # Not great, but shouldn't be doing this too often + totallogcount = 0 + if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: + page = self.opener.open(topicurl, timeout=60) + soup = BeautifulSoup(page.read()) + findtoc = soup.find_all(text='TOC of the extracted CD') + if not findtoc: + findtoc = soup.find_all(text='TOC извлечённого CD') + for toc in findtoc: + logcount = 0 + for toccontent in toc.find_all_next(text=True): + cut_string = toccontent.split('|') + new_string = cut_string[0].lstrip().rstrip() + if new_string == '1' or new_string == '01': + logcount = 1 + elif logcount > 0: + if new_string.isdigit(): + logcount += 1 + else: + break + totallogcount = totallogcount + logcount + + if totallogcount > 0: + trackcount = totallogcount + logger.debug ('rutracker logtrackcount: %s' % totallogcount) + + # If torrent track count = hp track count then return torrent, + # if greater, check for deluxe/special/foreign editions + # if less, then allow if it's a single track with a cue + valid = False + + if trackcount == hptrackcount: valid = True + elif trackcount > hptrackcount: + if any(deluxe in title for deluxe in deluxelist): + valid = True # Add to list - if valid: rulist.append((returntitle, size, topicurl)) else: if topicurl: logger.info(u'Torrent found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount)) - else: logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders))) - return rulist def get_torrent(self, url, savelocation=None):