From 5291b2f326bc87ff53d13ad4b33e296a5c1d83fb Mon Sep 17 00:00:00 2001 From: Aaron Cohen Date: Thu, 13 Sep 2012 02:07:50 -0700 Subject: [PATCH] Disabled a bunch of stuff that was being parsed and not needed by headphones, got down to 27 secs --- headphones/searcher.py | 5 +- lib/whatapi.py | 1797 +++++----------------------------------- 2 files changed, 191 insertions(+), 1611 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 757bc4dc..7a4ad4cf 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -849,7 +849,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False): all_children = [] for group in release_torrent_groups: - all_children += group.torrentinfo['torrent']['childrenids'] + all_children += group.getTorrentChildren() # cap at 10 matches, 1 per second to reduce hits on API...don't wanna get in trouble. # Might want to turn up number of matches later. # max_torrent_info_reads = 10 @@ -871,7 +871,8 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False): if not len(match_torrents): logger.info(u"No results found from %s for %s after filtering" % (provider, term)) elif len(match_torrents) > 1: - logger.info(u"Found %d matching releases from %s for %s after filtering" % (len(match_torrents), provider, artistterm)) + logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % + (len(match_torrents), provider, artistterm, albumterm)) match_torrents.sort(key=lambda x: x.getTorrentSeeders) for torrent in match_torrents: diff --git a/lib/whatapi.py b/lib/whatapi.py index 66c0d009..ec9a6c50 100755 --- a/lib/whatapi.py +++ b/lib/whatapi.py @@ -26,7 +26,7 @@ __date__ = "$Oct 23, 2010 11:21:12 PM$" import hashlib try: - from BeautifulSoup import BeautifulSoup + from BeautifulSoup import BeautifulSoup, SoupStrainer except: raise ImportError, "Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download" import httplib @@ -441,11 +441,10 @@ class Authenticate(WhatBase): homepage = response.body pickle.dump(self.whatcd.headers, f) except (KeyError, AttributeError): - print "Login failed, most likely bad creds or the site is down, nothing to do" f.close() os.remove('cookie') self.whatcd.headers = None -# quit() + raise Exception("Login failed, most likely bad creds or the site is down, nothing to do") f.close() @@ -1056,9 +1055,10 @@ class Parser(object): userInfo = {} soup = BeautifulSoup(str(dom)) for ul in soup.fetch('ul'): + ul_all_li = ul.findAll('li') if ul["id"] == "userinfo_username": #retrieve user logged id - hrefid = ul.findAll('li')[0].find("a")["href"] + hrefid = ul_all_li[0].find("a")["href"] regid = re.compile('[0-9]+') if regid.search(hrefid) is None: self.debugMessage("not found href to retrieve user id") @@ -1066,1455 +1066,7 @@ class Parser(object): userInfo["id"] = regid.search(hrefid).group(0) #retrieve user logged id - hrefauth = ul.findAll('li')[2].find("a")["href"] - regauth = re.compile('=[0-9a-fA-F]+') - if regid.search(hrefid) is None: - self.debugMessage("not found href to retrieve user id") - else: - userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:] - - elif ul["id"] == "userinfo_stats": - if len(ul.findAll('li')) > 0: - userInfo["uploaded"] = ul.findAll('li')[0].find("span").string - userInfo["downloaded"] = ul.findAll('li')[1].find("span").string - userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string - userInfo["required"] = ul.findAll('li')[3].find("span").string - userInfo["authenticate"] = True - - return userInfo - - def userInfo(self, dom, user): - """ - Parse an user's page and returns a dictionnary with its information - - # Parameters: - * dom str: user page html - * user str: what.cd username - """ - userInfo = {'stats':{}, 'percentile':{}, 'community':{}} - soup = BeautifulSoup(str(dom)) - - for div in soup.fetch('div', {'class':'box'}): - - #if paronoia is not set to 'Off', stop collecting data - if div.findAll('div')[0].string == "Personal": - if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off": - return None - - statscontainer = soup.findAll('div', {'class':'box'})[1] - percentilecontainer = soup.findAll('div', {'class':'box'})[2] - communitycontainer = soup.findAll('div', {'class':'box'})[4] - - - userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title'] - userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title'] - userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:] - userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:] - userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string - userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:] - userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:] - userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:] - userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:] - userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:] - userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:] - userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:] - userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:] - userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:] - - userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\ - communitycontainer.findAll('li')[0].find('a')['href']) - userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\ - communitycontainer.findAll('li')[1].find('a')['href']) - userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\ - communitycontainer.findAll('li')[2].find('a')['href']) - userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\ - communitycontainer.findAll('li')[3].find('a')['href']) - userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\ - communitycontainer.findAll('li')[4].find('a')['href']) - userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\ - communitycontainer.findAll('li')[5].find('a')['href']) - userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\ - communitycontainer.findAll('li')[6].find('a')['href']) - userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\ - communitycontainer.findAll('li')[7].find('a')['href']) - userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\ - communitycontainer.findAll('li')[8].find('a')['href']) - userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\ - communitycontainer.findAll('li')[9].find('a')['href']) - userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\ - communitycontainer.findAll('li')[10].find('a')['href']) - #NB: there's a carriage return and white spaces inside the snatched li tag - userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\ - communitycontainer.findAll('li')[11].find('a')['href']) - userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\ - None) - userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title'] - - return userInfo - - def torrentInfo(self, dom, id, isparent): - """ - Parse a torrent's page and returns a dictionnary with its information - """ - - torrentInfo = {'torrent':{}} - torrentfiles = [] - torrentdescription = "" - isreported = False - isfreeleech = False - soup = BeautifulSoup(str(dom)) - if isparent: - torrentInfo['torrent']['parentid'] = id - else: - groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href'] - torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1] - regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray') - torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0) - torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=") + 1:] - torrentInfo['torrent']['downloadurl'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a', {'title':'Download'})[0]['href'] - ## is freeleech or/and reported? ## - #both - if len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 4: - isreported = True - isfreeleech = True - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - #either - elif len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 2: - if soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Reported': - isreported = True - elif soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!': - isreported = True - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - #none - else: - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - torrentInfo['torrent']['isfreeleech'] = isfreeleech - torrentInfo['torrent']['isreported'] = isreported - torrentInfo['torrent']['size'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[1].string - torrentInfo['torrent']['snatched'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[2].string - torrentInfo['torrent']['seeders'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[3].string - torrentInfo['torrent']['leechers'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[4].string - torrentInfo['torrent']['uploadedby'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('a')[0].string - foldername = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('div')[1].string - if(foldername is None): - torrentInfo['torrent']['foldername'] = None - else: - torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername) - files = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('tr') - for file in files[1:-1]: - torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string)) - torrentInfo['torrent']['filelist'] = torrentfiles - #is there any description? - if len(soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')) > 1: - description = torrentInfo['torrent']['description'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')[1].contents - info = '' - for content in description: - if content.string: - info = "%s%s" % (info, self.utils._string(content.string)) - torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string)) - torrentInfo['torrent']['torrentdescription'] = torrentdescription - regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown') - torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0) - - torrentInfo['torrent']['comments'] = [] - torrentInfo['torrent']['commentspages'] = 0 - - if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0: - linkbox = dom.findAll("div", {"class": "linkbox"})[-1] - pages = 1 - postid = '' - userid = '' - post = '' - # if there's more than 1 page of torrents - if linkbox.find("a"): - # by default torrent page show last page of comments - lastpage = linkbox.findAll("a")[-1]['href'] - pages = int(lastpage[18:lastpage.find('&')]) + 1 - for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}): - postid = comment.find("a", {"class":"post_id"}).string[1:] - userid = comment.findAll("a")[1]['href'][12:] - username = comment.findAll("a")[1].string - post = comment.find("div", {"id":"content" + postid}) - post = u''.join([post.string for post in post.findAll(text=True)]) - torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username}) - - torrentInfo['torrent']['commentspages'] = pages - - return torrentInfo - - def artistInfo(self, dom): - """ - Parse an artist's page and returns a dictionnary with its information - """ - artistInfo = {} - releases = [] - requests = [] - infoartist = "" - tagsartist = [] - similarartists = [] - soup = BeautifulSoup(str(dom)) - for releasetype in soup.fetch('table', {'class':'torrent_table'}): - releasetypenames = releasetype.findAll('strong') - releasetypename = releasetype.findAll('strong')[0].string - for release in releasetypenames[1:-1]: - #skip release edition info and Freeleech! s - if len(release.parent.contents) > 1 and len(release.contents) > 1: - releaseyear = release.contents[0][0:4] - releasename = release.contents[1].string - releasehref = release.contents[1]['href'] - releaseid = releasehref[releasehref.rfind('=') + 1:] - releases.append({'releasetype':releasetypename,\ - 'year': releaseyear, 'name':self.utils.decodeHTMLEntities(releasename), 'id':releaseid}) - - artistInfo['releases'] = releases - #is there an artist image? - artistInfo['image'] = None - if soup.find('div', {'class':'box'}).find('img'): - artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src'] - #is there any artist info? - contents = soup.find('div', {'class':'body'}).contents - if len(contents) > 0: - for content in contents: - if content.string: - infoartist = "%s%s" % (infoartist, self.utils._string(content.string)) - artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist) - #is there any artist tags? - if soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li'): - ul = soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li') - for li in ul: - if li.contents[0].string: - tagsartist.append(self.utils._string(li.contents[0].string)) - artistInfo['tags'] = tagsartist - #is there any similar artist? - if soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}): - artists = soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}) - for artist in artists: - if artist.contents[0].string: - similarartists.append(self.utils._string(artist.contents[0].string)) - artistInfo['similarartists'] = similarartists - #is there any request? - if soup.find('table', {'id':'requests'}): - for request in soup.find('table', {'id':'requests'}).findAll('tr', {'class':re.compile('row')}): - requests.append({'requestname':request.findAll('a')[1].string, 'id':request.findAll('a')[1]['href'][28:]}) - - artistInfo['requests'] = requests - - return artistInfo - - def torrentsList(self, dom): - """ - Parse a torrent's list page and returns a dictionnary with its information - """ - torrentslist = [] - torrentssoup = dom.find("table", {"width": "100%"}) - pages = 0 - - #if there's at least 1 torrent in the list - if torrentssoup: - navsoup = dom.find("div", {"class": "linkbox"}) - pages = 1 - regyear = re.compile('\[\d{4}\]') - - #is there a page navigation bar? - if navsoup.contents: - #if there's more than 1 page of torrents - if navsoup.contents[-1].has_key('href'): - lastpage = navsoup.contents[-1]['href'] - pages = lastpage[18:lastpage.find('&')] - self.totalpages = pages - else: #we are at the last page, no href - pages = self.totalpages + 1 - #fetch all tr except first one (column head) - for torrent in torrentssoup.fetch('tr')[1:]: - #exclude non music torrents - if torrent.find('td').find('div')['class'][0:10] == 'cats_music': - - torrenttag = torrent.find('td').contents[1]['title'] - torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href'] - torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href'] - torrentid = torrentrm[torrentrm.rfind('=') + 1:] - torrenttd = torrent.findAll('td')[1] - - # remove dataless elements - torrenttags = torrenttd.div - rightlinks = torrenttd.span - torrenttags.extract() - rightlinks.extract() - - # remove line breaks - torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")]) - torrenttd = BeautifulSoup(torrenttd) - isScene = False - info = "" - - if len(torrenttd.findAll('a')) == 2: - #one artist - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),) - artistid = (torrenttd.find("a")['href'][14:],) - torrentalbum = torrenttd.findAll("a")[1].string - info = torrenttd.findAll("a")[1].nextSibling.string.strip() - - - elif len(torrenttd.findAll('a')) == 1: - #various artists - torrentartist = ('Various Artists',) - artistid = () - torrentalbum = torrenttd.find("a").string - info = torrenttd.find("a").nextSibling.string.strip() - - elif len(torrenttd.findAll('a')) == 3: - #two artists - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string),\ - self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string)) - artistid = (torrenttd.findAll("a")[0]['href'][14:],\ - torrenttd.findAll("a")[1]['href'][14:]) - torrentalbum = torrenttd.findAll("a")[2].string - info = torrenttd.findAll("a")[2].nextSibling.string.strip() - - elif torrenttd.find(text=re.compile('performed by')): - #performed by - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),) - artistid = (torrenttd.findAll("a")[-2]['href'][14:],) - torrentalbum = torrenttd.findAll("a")[-1].string - info = torrenttd.findAll("a")[-1].nextSibling.string.strip() - - if 'Scene' in info: - isScene = True - - torrentyear = regyear.search(info).group(0)[1:5] - torrentslist.append({'tag':torrenttag,\ - 'dlurl':torrentdl,\ - 'id':torrentid,\ - 'artist':torrentartist,\ - 'artistid':artistid,\ - 'album':self.utils.decodeHTMLEntities(torrentalbum), - 'year':torrentyear, - 'pages':pages, - 'scene':isScene}) - - return torrentslist - - def postsList(self, dom): - """ - Parse a post list page and returns a dictionnary with each post information: - {torrentid, commentid, postid} - """ - postslist = [] - postssoup = dom.find("div", {"class": "thin"}) - pages = 0 - - #if there's at least 1 post in the list - if postssoup: - navsoup = dom.find("div", {"class": "linkbox"}) - - #if there's more than 1 page of torrents - if navsoup.find("a"): - lastpage = navsoup.findAll("a")[1]['href'] - pages = lastpage[18:lastpage.find('&')] - self.totalpages = pages - else: #we are at the last page, no link - pages = 1 - - for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}): - commentbody = post.find("td", {"class":"body"}) - postid = post.find("span").findAll("a")[0].string[1:] - torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=') + 1:] - comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)]) - postdate = post.find("span", {"class":"time"})['title'] - postslist.append({'postid':postid,\ - 'torrentid':torrentid,\ - 'comment':comment,\ - 'postdate':postdate,\ - 'pages':pages}) - - - return postslist - - - def whatForm(self, dom, action): - """ - Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values - # Parameters: - * dom str: the edit page dom. - + action str: the action value from the requested form - """ - inputs = {} - - form = dom.find('input', {'name':'action', 'value':action}).parent - elements = form.fetch(('input', 'textarea')) - #get all form elements except for submit input - for element in elements[0:3]: - name = element.get('name', None) - if element.name == 'textarea': - inputs[name] = element.string - else: - inputs[name] = element.get('value', None) - return inputs - - - -if __name__ == "__main__": - print "Module to manage what.cd as a web service" -# -*- coding: utf_8 -*- -################################################################################# -# -# Name: whatapi.py -# -# Synopsis: Module to manage what.cd as a web service -# -# Description: See below list of the implemented webservices -# -# Copyright 2010 devilcius -# -# The Wide Open License (WOL) -# -# Permission to use, copy, modify, distribute and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice and this license appear in all source copies. -# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF -# ANY KIND. See http://www.dspguru.com/wide-open-license for more information. -# -################################################################################# - - -__author__ = "devilcius" -__date__ = "$Oct 23, 2010 11:21:12 PM$" - - -import hashlib -try: - from BeautifulSoup import BeautifulSoup -except: - raise ImportError, "Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download" -import httplib -import os -import pickle -import re -import urllib -import shelve -import tempfile -from htmlentitydefs import name2codepoint as n2cp - - -""" -A list of the implemented webservices (from what.cd ) -===================================== - -# User - - * user.getUserId - * user.getInfo - - * user.getTorrentsSeeding - * user.getTorrentsSnatched - * user.getTorrentsUploaded - * user.getTorrentsCommented - - * user.specificUserInfo - Atributes: - ######## stats ########### - -joindate - -lastseen - -dataup - -datadown - -ratio - -rratio - ######## percentile ########### - -uppercentile - -downpercentile - -torrentsuppercentile - -reqfilledpercentile - -bountyspentpercentile - -postsmadepercentile - -artistsaddedpercentile - -overallpercentile - ######## community ########### - -postsmade - -torrentscomments - -collagesstarted - -collagescontr - -reqfilled - -reqvoted - -uploaded - -unique - -perfect - -seeding - -leeching - -snatched - -invited - -artistsadded - - -# Artist - - * artist.getArtistReleases - * artist.getArtistImage - * artist.getArtistInfo - * artist.getArtistTags - * artist.getArtistSimilar - * artist.getArtistRequests - - + artist.setArtistInfo - - -# Torrent - - * torrent.getTorrentParentId - * torrent.getTorrentDownloadURL - * torrent.getTorrentDetails - * torrent.getTorrentSize - * torrent.getTorrentSnatched - * torrent.getTorrentSeeders - * torrent.getTorrentLeechers - * torrent.getTorrentUploadedBy - * torrent.getTorrentFolderName - * torrent.getTorrentFileList - * torrent.getTorrentDescription - * torrent.getTorrentComments - * torrent.isTorrentFreeLeech - * torrent.isTorrentReported - - -# Authenticate - - * authenticate.getAuthenticatedUserId - * authenticate.getAuthenticatedUserAuthCode - * authenticate.getAuthenticatedUserDownload - * authenticate.getAuthenticatedUserUpload() - * authenticate.getAuthenticatedUserRatio - * authenticate.getAuthenticatedUserRequiredRatio - -""" - -class ResponseBody: - """A Response Body Object""" - pass - -class SpecificInformation: - """A Specific Information Object""" - pass - - -class WhatBase(object): - """An abstract webservices object.""" - whatcd = None - - def __init__(self, whatcd): - self.whatcd = whatcd - #if we are not autenticated in what.cd, do it now - if not self.whatcd.isAuthenticated(): - print "authenticating..." - self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader() - - def _request(self, type, path, data, headers): - return Request(self.whatcd, type, path, data, headers) - - def _parser(self): - return Parser(self.whatcd) - - def utils(self): - return Utils() - - -class Utils(): - - def md5(self, text): - """Returns the md5 hash of a string.""" - - h = hashlib.md5() - h.update(self._string(text)) - - return h.hexdigest() - - def _unicode(self, text): - if type(text) == unicode: - return text - - if type(text) == int: - return unicode(text) - - return unicode(text, "utf-8") - - def _string(self, text): - if type(text) == str: - return text - - if type(text) == int: - return str(text) - - return text.encode("utf-8") - - def _number(self, string): - """ - Extracts an int from a string. Returns a 0 if None or an empty string was passed - """ - - if not string: - return 0 - elif string == "": - return 0 - else: - try: - return int(string) - except ValueError: - return float(string) - - def substituteEntity(self, match): - ent = match.group(2) - if match.group(1) == "#": - return unichr(int(ent)) - else: - cp = n2cp.get(ent) - - if cp: - return unichr(cp) - else: - return match.group() - - def decodeHTMLEntities(self, string): - entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") - return entity_re.subn(self.substituteEntity, string)[0] - - - -class WhatCD(object): - - def __init__(self, username, password, site, loginpage, headers): - - #credentials - self.username = username - self.password = password - self.site = site - self.loginpage = loginpage - self.headers = headers - self.authenticateduserinfo = {} - - self.cache_backend = None - self.proxy_enabled = False - self.proxy = None - - def isAuthenticated(self): - """ - Checks if we are authenticated in what.cd - """ - if "id" in self.authenticateduserinfo: - return True - else: - return False - - def getCredentials(self): - """ - Returns an authenticated user credentials object - """ - return Authenticate(self) - - - def getUser(self, username): - """ - Returns an user object - """ - return User(username, self) - - def getTorrent(self, id, page=1): - """ - Returns a torrent object - """ - return Torrent(id, page, None, self) - - def getTorrentGroup(self, id, page=1): - """ - Returns a torrent object - """ - return Torrent(id, page, True, self) - - def getArtist(self, name): - """ - Returns an artist object - """ - return Artist(name, self) - - def enableProxy(self, host, port): - """Enable a default web proxy""" - self.proxy = [host, Utils()._number(port)] - self.proxy_enabled = True - - def disableProxy(self): - """Disable using the web proxy""" - self.proxy_enabled = False - - def isProxyEnabled(self): - """Returns True if a web proxy is enabled.""" - return self.proxy_enabled - - def getProxy(self): - """Returns proxy details.""" - return self.proxy - - def enableCaching(self, file_path=None): - """Enables caching request-wide for all cachable calls. - * file_path: A file path for the backend storage file. If - None set, a temp file would probably be created, according the backend. - """ - if not file_path: - file_path = tempfile.mktemp(prefix="whatapi_tmp_") - - self.cache_backend = _ShelfCacheBackend(file_path) - - def disableCaching(self): - """Disables all caching features.""" - self.cache_backend = None - - def isCachingEnabled(self): - """Returns True if caching is enabled.""" - - return not (self.cache_backend == None) - - def getCacheBackend(self): - - return self.cache_backend - -def getWhatcdNetwork(username="", password=""): - """ - Returns a preconfigured WhatCD object for what.cd - # Parameters: - * username str: a username of a valid what.cd user - * password str: user's password - """ - - return WhatCD ( - username=username, - password=password, - site="ssl.what.cd", - loginpage="/login.php", - headers={ - "Content-type": "application/x-www-form-urlencoded", - 'Accept-Charset': 'utf-8', - 'User-Agent': "whatapi [devilcius]" - }) - - - -class _ShelfCacheBackend(object): - """Used as a backend for caching cacheable requests.""" - def __init__(self, file_path=None): - self.shelf = shelve.open(file_path) - - def getHTML(self, key): - return self.shelf[key] - - def setHTML(self, key, xml_string): - self.shelf[key] = xml_string - - def hasKey(self, key): - return key in self.shelf.keys() - - -class Request(object): - """web service operation.""" - - def __init__(self, whatcd, type, path, data, headers): - - self.whatcd = whatcd - self.utils = Utils() - self.type = type - self.path = path - self.data = data - self.headers = headers - #enable catching? - if whatcd.isCachingEnabled(): - self.cache = whatcd.getCacheBackend() - - def getCacheKey(self): - """The cache key is a md5 hash of request params.""" - - key = self.type + self.path + self.data - return Utils().md5(key) - - def getCachedResponse(self): - """Returns a file object of the cached response.""" - - if not self.isCached(): - response = self.downloadResponse() - self.cache.setHTML(self.getCacheKey(), response) - return self.cache.getHTML(self.getCacheKey()) - - def isCached(self): - """Returns True if the request is already in cache.""" - - return self.cache.hasKey(self.getCacheKey()) - - def downloadResponse(self): - """Returns a ResponseBody object from the server.""" - - #print "downloading from %s" % (self.path) - conn = httplib.HTTPSConnection(self.whatcd.site) - rb = ResponseBody() - - if self.whatcd.isProxyEnabled(): - conn = httplib.HTTPSConnection(host=self.whatcd.getProxy()[0], port=self.whatcd.getProxy()[1]) - conn.request(method=self.type, url="https://" + self.whatcd.site + self.path, body=self.data, headers=self.headers) - else: - conn.request(self.type, self.path, self.data, self.headers) - - response = conn.getresponse() - rb.headers = response.getheaders() - # Rip all inline JavaScript out of the response in case it hasn't been properly escaped - rb.body = re.sub('', '', response.read()) - conn.close() - return rb - - def execute(self, cacheable=False): - """Depending if caching is enabled, returns response from the server or, if available, the cached response""" - if self.whatcd.isCachingEnabled() and cacheable: - response = self.getCachedResponse() - else: - response = self.downloadResponse() - - return response - -class Authenticate(WhatBase): - - def __init__(self, whatcd): - """Create an authenticated user object. - # Parameters: - * whatcd object: WhatCD object. - """ - self.whatcd = whatcd - self.parser = Parser(whatcd) - if not self.whatcd.isAuthenticated(): - self.getAuthenticatedHeader() - - def setCookie(self): - print "creating cookie" - f = open('cookie', 'w') - loginform = {'username': self.whatcd.username, 'password': self.whatcd.password\ - , 'keeplogged': '1', 'login': 'Login'} - data = urllib.urlencode(loginform) - response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True) - try: - cookie = dict(response.headers)['set-cookie'] - session = re.search("session=[^;]+", cookie).group(0) - self.whatcd.headers["Cookie"] = session - homepage = response.body - pickle.dump(self.whatcd.headers, f) - except (KeyError, AttributeError): - print "Login failed, most likely bad creds or the site is down, nothing to do" - f.close() - os.remove('cookie') - self.whatcd.headers = None - quit() - f.close() - - - def getAuthenticatedHeader(self): - """ - Log user in what.cd and returns the authenticated header - """ - homepage = None - if os.path.exists("cookie"): - f = open("cookie", "r") - try: - self.whatcd.headers = pickle.load(f) - except EOFError: - f.close() - os.remove("cookie") - print "invalid cookie, removed" - self.setCookie() - else: - self.setCookie() - #set authenticated user info - if 'id' not in self.whatcd.authenticateduserinfo: - self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage) - - return self.whatcd.headers - - def getAuthenticatedUserInfo(self, homepage=None): - """ - Returns authenticated user's info - """ - if not homepage: - homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body) - authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"})) - return authuserinfo - - def getAuthenticatedUserId(self): - """ - Returns authenticated user's id - """ - return self.whatcd.authenticateduserinfo["id"] - - def getAuthenticatedUserAuthCode(self): - """ - Returns authenticated user's authcode - """ - return self.whatcd.authenticateduserinfo["authcode"] - - - def getAuthenticatedUserUpload(self): - """ - Returns authenticated user's total uploaded data - """ - return self.whatcd.authenticateduserinfo["uploaded"] - - - def getAuthenticatedUserDownload(self): - """ - Returns authenticated user's total downloaded data - """ - return self.whatcd.authenticateduserinfo["downloaded"] - - - def getAuthenticatedUserRatio(self): - """ - Returns authenticated user's ratio - """ - return self.whatcd.authenticateduserinfo["ratio"] - - def getAuthenticatedUserRequiredRatio(self): - """ - Returns authenticated user's required ratio - """ - return self.whatcd.authenticateduserinfo["required"] - - -class User(WhatBase): - """A What.CD user""" - - def __init__(self, username, whatcd): - """Create an user object. - # Parameters: - * username str: The user's name. - - whatcd object: the what.cd network object - """ - WhatBase.__init__(self, whatcd) - self.name = username - self.whatcd = whatcd - self.userpage = "/user.php?" - self.userid = None - self.userinfo = None - - def getUserName(self): - """ - Returns user's name - """ - return self.username - - def getUserId(self): - """ - Returns user's id, None if user doesn't exists - """ - if self.userid: - return self.userid - else: - idform = {'action': "search", 'search': self.name} - data = urllib.urlencode(idform) - headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers - if dict(headers) is None: - return None - else: - self.userid = dict(headers)['location'][12:] - return self.userid - - def getInfo(self): - """ - Returns a dictionary of {percentile:{dataup str, - datadown str, - overall str, - postmade str, - boutyspent str, - reqfilled str, - artistsadded str, - torrentsup str}, - stats: {uploaded str, - ratio str, - joined str, - downloaded str, - lastseen str, - rratio str}, - community: {uploaded tuple(total str, url str), - forumposts tuple(total str, url str), - invited tuple (total,None), - perfectflacs tuple(total str, url str), - contributedcollages tuple(total str, url str), - reqvoted tuple(total str, url str), - uniquegroups tuple(total str, url str) - torrentscomments tuple(total str, url str), - snatched tuple(total str, url str), - artists str, - reqfilled tuple(total str, url str), - startedcollages tuple(total str, url str), - leeching tuple(total str, url str), - seeding tuple(total str, url str)} - } - If paranoia is not Off, it returns None. - """ - if self.getUserId(): - form = {'id': self.getUserId()} - data = urllib.urlencode(form) - userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body) - info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name) - self.userinfo = info - return info - else: - print "no user id retrieved" - return None - - - def getTorrentsSeeding(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - def getTorrentsSnatched(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - def getTorrentsUploaded(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - - def getTorrentsCommented(self, page=1): - """ - Returns a list with all user's commented torrents - in form of dictionary {postid, torrentid, comment,postdate, pages} - - """ - if self.userid is None: - self.userid = self.getUserId() - - url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1], page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().postsList(torrentspage) - - - - ############################################### - # specific values # - ############################################### - - - def specificUserInfo(self): - """ - Returns specific attributes of user info. None if user's paranoia is on - """ - info = SpecificInformation() - # Initialize attributes - info.joindate, info.lastseen, info.dataup, info.datadown,\ - info.ratio, info.rratio, info.uppercentile, info.downpercentile,\ - info.torrentsuppercentile, info.reqfilledpercentile, info.bountyspentpercentile,\ - info.postsmadepercentile, info.artistsaddedpercentile, info.overallpercentile,\ - info.postsmadecom, info.torrentscommentscom, info.collagesstartedcom, info.collagescontrcon,\ - info.reqfilledcom, info.reqvotedcom, info.uploadedcom, info.uniquecom, info.perfectcom,\ - info.seedingcom, info.leechingcom, info.snatchedcom, info.invitedcom, info.artistsaddedcom\ - = (None, None, None, None, None, None, None, None, None, None, None, None, None, None,\ - None, None, None, None, None, None, None, None, None, None, None, None, None, None) - - - if not self.userinfo and self.getInfo() is None: - pass - else: - ######## stats ########### - info.joindate = self.userinfo['stats']['joined'] - info.lastseen = self.userinfo['stats']['lastseen'] - info.dataup = self.userinfo['stats']['uploaded'] - info.datadown = self.userinfo['stats']['downloaded'] - info.ratio = self.userinfo['stats']['ratio'] - info.rratio = self.userinfo['stats']['rratio'] - ######## percentile ########### - info.uppercentile = self.userinfo['percentile']['dataup'] - info.downpercentile = self.userinfo['percentile']['datadown'] - info.torrentsuppercentile = self.userinfo['percentile']['torrentsup'] - info.reqfilledpercentile = self.userinfo['percentile']['reqfilled'] - info.bountyspentpercentile = self.userinfo['percentile']['bountyspent'] - info.postsmadepercentile = self.userinfo['percentile']['postsmade'] - info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded'] - info.overallpercentile = self.userinfo['percentile']['overall'] - ######## community ########### - info.postsmadecom = self.userinfo['community']['forumposts'] - info.torrentscomments = self.userinfo['community']['torrentscomments'] - info.collagesstartedcom = self.userinfo['community']['startedcollages'] - info.collagescontrcon = self.userinfo['community']['contributedcollages'] - info.reqfilledcom = self.userinfo['community']['reqfilled'] - info.reqvotedcom = self.userinfo['community']['reqvoted'] - info.uploadedcom = self.userinfo['community']['uploaded'] - info.uniquecom = self.userinfo['community']['uniquegroups'] - info.perfectcom = self.userinfo['community']['pefectflacs'] - info.seedingcom = self.userinfo['community']['seeding'] - info.leechingcom = self.userinfo['community']['leeching'] - info.snatchedcom = self.userinfo['community']['snatched'] - info.invitedcom = self.userinfo['community']['invited'][0] - info.artistsaddedcom = self.userinfo['community']['artists'] - - - - return info - - -class Torrent(WhatBase): - """A What.CD torrent""" - - def __init__(self, id, page, isparent, whatcd): - """Create a torrent object. - # Parameters: - * id str: The torrent's id. - * whatcd object: the WhatCD network object - * page: The torrent page's number [optional] - """ - WhatBase.__init__(self, whatcd) - self.id = id - self.page = page - self.whatcd = whatcd - self.isParent = isparent - self.torrentpage = "/torrents.php?" - self.torrentinfo = self.getInfo() - - - def getTorrentUrl(self): - """ - Returns torrent's URL - """ - if self.isParent: - form = {'id': self.id, 'page':self.page} - data = urllib.urlencode(form) - return self.torrentpage + data - else: - form = {'torrentid': self.id, 'page':self.page} - data = urllib.urlencode(form) - headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers - - if dict(headers) is None: - return None - else: - if 'location' not in dict(headers).keys(): - return None - else: - return dict(headers)['location'] - - - def getInfo(self): - """ - Returns a dictionnary with torrents's info - """ - if self.getTorrentUrl() is None: - print "no torrent retrieved with such id" - return None - - torrentpage = BeautifulSoup(self._request("GET", "/" + self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body) - - if 'Site log' in torrentpage.find("title").string: - print "no torrent retrieved with such id" - return None - else: - return self._parser().torrentInfo(torrentpage, self.id, self.isParent) - - - def getTorrentParentId(self): - """ - Returns torrent's group id - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['parentid'] - - def getTorrentDownloadURL(self): - """ - Returns relative url to download the torrent - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['downloadurl'] - - def getTorrentDetails(self): - """ - Returns torrent's details (format / bitrate) - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['details'] - - def getTorrentEditionInfo(self): - """ - Returns torrent's edition info (Edition information / media type) - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['editioninfo'] - - def getTorrentMediaType(self): - """ - Returns torrent's media type - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['rlsmedia'] - - def getTorrentSize(self): - """ - Returns torrent's size - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['size'] - - - def getTorrentSnatched(self): - """ - Returns torrent's total snatches - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['snatched'] - - - def getTorrentSeeders(self): - """ - Returns torrent's current seeders - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['seeders'] - - def getTorrentLeechers(self): - """ - Returns torrent's current leechers - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['leechers'] - - def getTorrentUploadedBy(self): - """ - Returns torrent's uploader - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['uploadedby'] - - def getTorrentFolderName(self): - """ - Returns torrent's folder name - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['foldername'] - - def getTorrentFileList(self): - """ - Returns torrent's file list - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['filelist'] - - - def getTorrentReleaseType(self): - """ - Returns torrent's release type - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['rlstype'] - - def getTorrentDescription(self): - """ - Returns torrent's description / empty string is there's none - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['torrentdescription'] - - def getTorrentComments(self): - """ - Returns a list of dictionnaries with each comment in the torrent page - {postid,post,userid,username} - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['comments'] - - def getTorrentCommentsPagesNumber(self): - """ - Returns number of pages of comments in the torrent - """ - if self.torrentinfo: - return self.torrentInfo['torrent']['commentspages'] - - def isTorrentFreeLeech(self): - """ - Returns True if torrent is freeleeech, False if not - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['isfreeleech'] - - def isTorrentReported(self): - """ - Returns True if torrent is reported, False if not - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['isreported'] - - -class Artist(WhatBase): - """A What.CD artist""" - - def __init__(self, name, whatcd): - """Create an artist object. - # Parameters: - * name str: The artist's name. - * whatcd object: The WhatCD network object - """ - WhatBase.__init__(self, whatcd) - self.name = name - self.whatcd = whatcd - self.artistpage = "/artist.php" - self.utils = Utils() - self.info = self.getInfo() - - - def getArtistName(self): - """ - Returns artist's name - """ - return self.name - - def getArtistId(self): - """ - Returns artist's id, None if artist's not found - """ - form = {'artistname': self.name} - data = urllib.urlencode(form) - headers = self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).headers - if dict(headers)['location'][0:14] != 'artist.php?id=': - return None - else: - return dict(headers)['location'][14:] - - def getInfo(self): - """ - Returns artist's info, None if there isn't - """ - if self.getArtistId(): - form = {'id': self.getArtistId()} - data = urllib.urlencode(form) - artistpage = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body) - return self._parser().artistInfo(artistpage) - else: - print "no artist info retrieved" - return None - - def getArtistReleases(self): - """ - Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id} - """ - return self.info['releases'] - - def getArtistImage(self): - """ - Return the artist image URL, None if there's no image - """ - return self.info['image'] - - def getArtistInfo(self): - """ - Return the artist's info, blank string if none - """ - return self.info['info'] - - def getArtistTags(self): - """ - Return a list with artist's tags - """ - return self.info['tags'] - - def getArtistSimilar(self): - """ - Return a list with artist's similar artists - """ - return self.info['similarartists'] - - def getArtistRequests(self): - """ - Returns a list with all artist's requests in form of dictionary {requestname, id} - """ - return self.info['requests'] - - def setArtistInfo(self, id, info): - """ - Updates what.cd artist's info and image - Returns 1 if artist info updated succesfully, 0 if not. - # Parameters: - * id str: what.cd artist's id - * info tuple: (The artist's info -str-, image url -str- (None if there isn't)) - """ - if info[0]: - params = {'action': 'edit', 'artistid':id} - data = urllib.urlencode(params) - - edit_page = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body) - what_form = self._parser().whatForm(edit_page, 'edit') - if info[1]: - image_to_post = info[1] - else: - image_to_post = what_form['image'] - data_to_post = {'body': info[0].encode('utf-8'), - 'summary':'automated artist info insertion',\ - 'image':image_to_post,\ - 'artistid':what_form['artistid'],\ - 'auth':what_form['auth'],\ - 'action':what_form['action']} - - #post artist's info - self.whatcd.headers['Content-type'] = "application/x-www-form-urlencoded" - response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False) - artist_id_returned = dict(response.headers)['location'][14:] - - if str(artist_id_returned) == str(what_form['artistid']): - return 1 - else: - return 0 - - else: - return 'no artist info provided. Aborting.' - exit() - - -class Parser(object): - - def __init__(self, whatcd): - self.utils = Utils() - self.whatcd = whatcd - self.totalpages = 0 - - def authenticatedUserInfo(self, dom): - """ - Parse the index page and returns a dictionnary with basic authenticated user information - """ - userInfo = {} - soup = BeautifulSoup(str(dom)) - for ul in soup.fetch('ul'): - if ul["id"] == "userinfo_username": - #retrieve user logged id - hrefid = ul.findAll('li')[0].find("a")["href"] - regid = re.compile('[0-9]+') - if regid.search(hrefid) is None: - self.debugMessage("not found href to retrieve user id") - else: - userInfo["id"] = regid.search(hrefid).group(0) - - #retrieve user logged id - hrefauth = ul.findAll('li')[2].find("a")["href"] + hrefauth = ul_all_li[2].find("a")["href"] regauth = re.compile('=[0-9a-zA-Z]+') if regid.search(hrefid) is None: self.debugMessage("not found href to retrieve user id") @@ -2522,11 +1074,11 @@ class Parser(object): userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:] elif ul["id"] == "userinfo_stats": - if len(ul.findAll('li')) > 0: - userInfo["uploaded"] = ul.findAll('li')[0].find("span").string - userInfo["downloaded"] = ul.findAll('li')[1].find("span").string - userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string - userInfo["required"] = ul.findAll('li')[3].find("span").string + if len(ul_all_li) > 0: + userInfo["uploaded"] = ul_all_li[0].find("span").string + userInfo["downloaded"] = ul_all_li[1].find("span").string + userInfo["ratio"] = ul_all_li[2].findAll("span")[1].string + userInfo["required"] = ul_all_li[3].find("span").string userInfo["authenticate"] = True return userInfo @@ -2549,54 +1101,58 @@ class Parser(object): if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off": return None - statscontainer = soup.findAll('div', {'class':'box'})[1] - percentilecontainer = soup.findAll('div', {'class':'box'})[2] - communitycontainer = soup.findAll('div', {'class':'box'})[4] + all_div_box = soup.findAll('div', {'class': 'box'}) + statscontainer = all_div_box[1] + percentilecontainer = all_div_box[2] + communitycontainer = all_div_box[4] + statscontainer_all_li = statscontainer.findAll('li') + userInfo['stats']['joined'] = statscontainer_all_li[0].find('span')['title'] + userInfo['stats']['lastseen'] = statscontainer_all_li[1].find('span')['title'] + userInfo['stats']['uploaded'] = statscontainer_all_li[2].string[10:] + userInfo['stats']['downloaded'] = statscontainer_all_li[3].string[12:] + userInfo['stats']['ratio'] = statscontainer_all_li[4].find('span').string + userInfo['stats']['rratio'] = statscontainer_all_li[5].string[16:] - userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title'] - userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title'] - userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:] - userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:] - userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string - userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:] - userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:] - userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:] - userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:] - userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:] - userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:] - userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:] - userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:] - userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:] +# percentilecontainer_all_li = percentilecontainer.findAll('li') +# userInfo['percentile']['dataup'] = percentilecontainer_all_li[0].string[15:] +# userInfo['percentile']['datadown'] = percentilecontainer_all_li[1].string[17:] +# userInfo['percentile']['torrentsup'] = percentilecontainer_all_li[2].string[19:] +# userInfo['percentile']['reqfilled'] = percentilecontainer_all_li[3].string[17:] +# userInfo['percentile']['bountyspent'] = percentilecontainer_all_li[4].string[14:] +# userInfo['percentile']['postsmade'] = percentilecontainer_all_li[5].string[12:] +# userInfo['percentile']['artistsadded'] = percentilecontainer_all_li[6].string[15:] +# userInfo['percentile']['overall'] = percentilecontainer_all_li[7].find('strong').string[14:] - userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\ - communitycontainer.findAll('li')[0].find('a')['href']) - userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\ - communitycontainer.findAll('li')[1].find('a')['href']) - userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\ - communitycontainer.findAll('li')[2].find('a')['href']) - userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\ - communitycontainer.findAll('li')[3].find('a')['href']) - userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\ - communitycontainer.findAll('li')[4].find('a')['href']) - userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\ - communitycontainer.findAll('li')[5].find('a')['href']) - userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\ - communitycontainer.findAll('li')[6].find('a')['href']) - userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\ - communitycontainer.findAll('li')[7].find('a')['href']) - userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\ - communitycontainer.findAll('li')[8].find('a')['href']) - userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\ - communitycontainer.findAll('li')[9].find('a')['href']) - userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\ - communitycontainer.findAll('li')[10].find('a')['href']) - #NB: there's a carriage return and white spaces inside the snatched li tag - userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\ - communitycontainer.findAll('li')[11].find('a')['href']) - userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\ - None) - userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title'] +# communitycontainer_all_li = communitycontainer.findAll('li') +# userInfo['community']['forumposts'] = (communitycontainer_all_li[0].contents[0].string[13:len(communitycontainer_all_li[0].contents[0].string)-2],\ +# communitycontainer_all_li[0].find('a')['href']) +# userInfo['community']['torrentscomments'] = (communitycontainer_all_li[1].contents[0].string[18:len(communitycontainer_all_li[1].contents[0].string)-2],\ +# communitycontainer_all_li[1].find('a')['href']) +# userInfo['community']['startedcollages'] = (communitycontainer_all_li[2].contents[0].string[18:len(communitycontainer_all_li[2].contents[0].string)-2],\ +# communitycontainer_all_li[2].find('a')['href']) +# userInfo['community']['contributedcollages'] = (communitycontainer_all_li[3].contents[0].string[25:len(communitycontainer_all_li[3].contents[0].string)-2],\ +# communitycontainer_all_li[3].find('a')['href']) +# userInfo['community']['reqfilled'] = (communitycontainer_all_li[4].contents[0].string[17:len(communitycontainer_all_li[4].contents[0].string)-2],\ +# communitycontainer_all_li[4].find('a')['href']) +# userInfo['community']['reqvoted'] = (communitycontainer_all_li[5].contents[0].string[16:len(communitycontainer_all_li[5].contents[0].string)-2],\ +# communitycontainer_all_li[5].find('a')['href']) +# userInfo['community']['uploaded'] = (communitycontainer_all_li[6].contents[0].string[10:len(communitycontainer_all_li[6].contents[0].string)-2],\ +# communitycontainer_all_li[6].find('a')['href']) +# userInfo['community']['uniquegroups'] = (communitycontainer_all_li[7].contents[0].string[15:len(communitycontainer_all_li[7].contents[0].string)-2],\ +# communitycontainer_all_li[7].find('a')['href']) +# userInfo['community']['pefectflacs'] = (communitycontainer_all_li[8].contents[0].string[16:len(communitycontainer_all_li[8].contents[0].string)-2],\ +# communitycontainer_all_li[8].find('a')['href']) +# userInfo['community']['seeding'] = (communitycontainer_all_li[9].contents[0].string[9:len(communitycontainer_all_li[9].contents[0].string)-2],\ +# communitycontainer_all_li[9].find('a')['href']) +# userInfo['community']['leeching'] = (communitycontainer_all_li[10].contents[0].string[10:len(communitycontainer_all_li[10].contents[0].string)-2],\ +# communitycontainer_all_li[10].find('a')['href']) +# #NB: there's a carriage return and white spaces inside the snatched li tag +# userInfo['community']['snatched'] = (communitycontainer_all_li[11].contents[0].string[10:len(communitycontainer_all_li[11].contents[0].string)-7],\ +# communitycontainer_all_li[11].find('a')['href']) +# userInfo['community']['invited'] = (communitycontainer_all_li[12].contents[0].string[9:],\ +# None) +# userInfo['community']['artists'] = percentilecontainer_all_li[6]['title'] return userInfo @@ -2624,30 +1180,38 @@ class Parser(object): regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray') torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0) torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=") + 1:] - torrentInfo['torrent']['downloadurl'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a', {'title':'Download'})[0]['href'] - ## is freeleech or/and reported? ## - #both - if len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 4: - isreported = True - isfreeleech = True - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - #either - elif len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 2: - if soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Reported': - isreported = True - elif soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!': - isreported = True - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - #none - else: - torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0] - torrentInfo['torrent']['isfreeleech'] = isfreeleech - torrentInfo['torrent']['isreported'] = isreported - torrentInfo['torrent']['size'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[1].string - torrentInfo['torrent']['snatched'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[2].string - torrentInfo['torrent']['seeders'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[3].string - torrentInfo['torrent']['leechers'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[4].string - torrentInfo['torrent']['uploadedby'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('a')[0].string + + all_tr_id_torrent = soup.findAll('tr', {'id': 'torrent%s' % id}) +# all_torrent_a = all_tr_id_torrent[0].findAll('a') + + torrentInfo['torrent']['downloadurl'] = all_tr_id_torrent[0].findAll('a', {'title':'Download'})[0]['href'] +# ## is freeleech or/and reported? ## +# #both +# if len(all_torrent_a[-1].contents) == 4: +# isreported = True +# isfreeleech = True +# torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] +# #either +# elif len(all_torrent_a[-1].contents) == 2: +# if all_torrent_a[-1].contents[1].string == 'Reported': +# isreported = True +# elif all_torrent_a[-1].contents[1].string == 'Freeleech!': +# isreported = True +# torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] +# #none +# else: +# torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] +# torrentInfo['torrent']['isfreeleech'] = isfreeleech +# torrentInfo['torrent']['isreported'] = isreported + + all_torrent_td = all_tr_id_torrent[0].findAll('td') + torrentInfo['torrent']['size'] = all_torrent_td[1].string + torrentInfo['torrent']['snatched'] = all_torrent_td[2].string + torrentInfo['torrent']['seeders'] = all_torrent_td[3].string + torrentInfo['torrent']['leechers'] = all_torrent_td[4].string + + all_tr_id_torrent_underscore = soup.findAll('tr', {'id': 'torrent_%s' % id}) + torrentInfo['torrent']['uploadedby'] = all_tr_id_torrent_underscore[0].findAll('a')[0].string foldername = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('div')[1].string if(foldername is None): torrentInfo['torrent']['foldername'] = None @@ -2658,40 +1222,44 @@ class Parser(object): torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string)) torrentInfo['torrent']['filelist'] = torrentfiles #is there any description? - if len(soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')) > 1: - description = torrentInfo['torrent']['description'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')[1].contents - info = '' - for content in description: - if content.string: - info = "%s%s" % (info, self.utils._string(content.string)) - torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string)) - torrentInfo['torrent']['torrentdescription'] = torrentdescription +# all_torrent_blockquote = all_tr_id_torrent_underscore[0].findAll('blockquote') +# if len(all_torrent_blockquote) > 1: +# description = torrentInfo['torrent']['description'] = all_torrent_blockquote[1].contents +# info = '' +# for content in description: +# if content.string: +# info = "%s%s" % (info, self.utils._string(content.string)) +# torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string)) +# torrentInfo['torrent']['torrentdescription'] = torrentdescription regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown') torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0) torrentInfo['torrent']['comments'] = [] torrentInfo['torrent']['commentspages'] = 0 - if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0: - linkbox = dom.findAll("div", {"class": "linkbox"})[-1] - pages = 1 - postid = '' - userid = '' - post = '' - # if there's more than 1 page of torrents - if linkbox.find("a"): - # by default torrent page show last page of comments - lastpage = linkbox.findAll("a")[-1]['href'] - pages = int(lastpage[18:lastpage.find('&')]) + 1 - for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}): - postid = comment.find("a", {"class":"post_id"}).string[1:] - userid = comment.findAll("a")[1]['href'][12:] - username = comment.findAll("a")[1].string - post = comment.find("div", {"id":"content" + postid}) - post = u''.join([post.string for post in post.findAll(text=True)]) - torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username}) - - torrentInfo['torrent']['commentspages'] = pages + # if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0: + # linkbox = dom.findAll("div", {"class": "linkbox"})[-1] + # pages = 1 + # postid = '' + # userid = '' + # post = '' + # # if there's more than 1 page of torrents + # linkbox_all_a = linkbox.findAll("a") + # if len(linkbox_all_a): + # # by default torrent page show last page of comments + # lastpage = linkbox_all_a[-1]['href'] + # pages = int(lastpage[18:lastpage.find('&')]) + 1 + # for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}): + # postid = comment.find("a", {"class":"post_id"}).string[1:] + # + # all_comment_a = comment.findAll("a") + # userid = all_comment_a[1]['href'][12:] + # username = all_comment_a[1].string + # post = comment.find("div", {"id":"content" + postid}) + # post = u''.join([post.string for post in post.findAll(text=True)]) + # torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username}) + # + # torrentInfo['torrent']['commentspages'] = pages return torrentInfo @@ -2706,9 +1274,9 @@ class Parser(object): tagsartist = [] similarartists = [] soup = BeautifulSoup(str(dom)) - for releasetype in soup.fetch('table', {'class':'torrent_table grouped release_table'}): + for releasetype in soup.fetch('table', {'class': re.compile(r'\btorrent_table\b')}): releasetypenames = releasetype.findAll('strong') - releasetypename = releasetype.findAll('strong')[0].string + releasetypename = releasetypenames[0].string for release in releasetypenames[1:-1]: #skip release edition info and Freeleech! s if len(release.parent.contents) > 1 and len(release.contents) > 1: @@ -2720,37 +1288,45 @@ class Parser(object): 'year': releaseyear, 'name':self.utils.decodeHTMLEntities(releasename), 'id':releaseid}) artistInfo['releases'] = releases - #is there an artist image? - artistInfo['image'] = None - if soup.find('div', {'class':'box'}).find('img'): - artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src'] - #is there any artist info? - contents = soup.find('div', {'class':'body'}).contents - if len(contents) > 0: - for content in contents: - if content.string: - infoartist = "%s%s" % (infoartist, self.utils._string(content.string)) - artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist) - #is there any artist tags? - if soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li'): - ul = soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li') - for li in ul: - if li.contents[0].string: - tagsartist.append(self.utils._string(li.contents[0].string)) - artistInfo['tags'] = tagsartist - #is there any similar artist? - if soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}): - artists = soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}) - for artist in artists: - if artist.contents[0].string: - similarartists.append(self.utils._string(artist.contents[0].string)) - artistInfo['similarartists'] = similarartists - #is there any request? - if soup.find('table', {'id':'requests'}): - for request in soup.find('table', {'id':'requests'}).findAll('tr', {'class':re.compile('row')}): - requests.append({'requestname':request.findAll('a')[1].string, 'id':request.findAll('a')[1]['href'][28:]}) - artistInfo['requests'] = requests + # This artist stuff wastes 10 secs + + #is there an artist image? +# artistInfo['image'] = None +# div_box = soup.find('div', {'class': 'box'}) +# if div_box.find('img'): +# artistInfo['image'] = div_box.find('img')['src'] +# #is there any artist info? +# contents = soup.find('div', {'class':'body'}).contents +# if len(contents) > 0: +# for content in contents: +# if content.string: +# infoartist = "%s%s" % (infoartist, self.utils._string(content.string)) +# artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist) + #is there any artist tags? +# all_ul_class_stats_nobullet = soup.findAll('ul', {'class': 'stats nobullet'}) +# all_ul_class_stats_nobullet_li = all_ul_class_stats_nobullet[0].findAll('li') +# if all_ul_class_stats_nobullet_li: +# ul = all_ul_class_stats_nobullet_li +# for li in ul: +# if li.contents[0].string: +# tagsartist.append(self.utils._string(li.contents[0].string)) +# artistInfo['tags'] = tagsartist + #is there any similar artist? +# if all_ul_class_stats_nobullet[2].findAll('span', {'title':'2'}): +# artists = all_ul_class_stats_nobullet[2].findAll('span', {'title':'2'}) +# for artist in artists: +# if artist.contents[0].string: +# similarartists.append(self.utils._string(artist.contents[0].string)) +# artistInfo['similarartists'] = similarartists + #is there any request? +# table_requests = soup.find('table', {'id': 'requests'}) +# if table_requests: +# for request in table_requests.findAll('tr', {'class':re.compile('row')}): +# request_all_a_1 = request.findAll('a')[1] +# requests.append({'requestname': request_all_a_1.string, 'id': request_all_a_1['href'][28:]}) +# +# artistInfo['requests'] = requests return artistInfo @@ -2783,8 +1359,9 @@ class Parser(object): if torrent.find('td').find('div')['class'][0:10] == 'cats_music': torrenttag = torrent.find('td').contents[1]['title'] - torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href'] - torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href'] + all_td_1_span_a = torrent.findAll('td')[1].find('span').findAll('a') + torrentdl = all_td_1_span_a[0]['href'] + torrentrm = all_td_1_span_a[1]['href'] torrentid = torrentrm[torrentrm.rfind('=') + 1:] torrenttd = torrent.findAll('td')[1] @@ -2800,36 +1377,38 @@ class Parser(object): isScene = False info = "" - if len(torrenttd.findAll('a')) == 2: + torrenttd_find_a = torrenttd.find("a") + torrenttd_all_a = torrenttd.findAll("a") + if len(torrenttd_all_a) == 2: #one artist - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),) - artistid = (torrenttd.find("a")['href'][14:],) - torrentalbum = torrenttd.findAll("a")[1].string - info = torrenttd.findAll("a")[1].nextSibling.string.strip() + torrentartist = (self.utils.decodeHTMLEntities(torrenttd_find_a.string), ) + artistid = (torrenttd_find_a['href'][14:], ) + torrentalbum = torrenttd_all_a[1].string + info = torrenttd_all_a[1].nextSibling.string.strip() - elif len(torrenttd.findAll('a')) == 1: + elif len(torrenttd_all_a) == 1: #various artists - torrentartist = ('Various Artists',) + torrentartist = ('Various Artists', ) artistid = () - torrentalbum = torrenttd.find("a").string - info = torrenttd.find("a").nextSibling.string.strip() + torrentalbum = torrenttd_find_a.string + info = torrenttd_find_a.nextSibling.string.strip() - elif len(torrenttd.findAll('a')) == 3: + elif len(torrenttd_all_a) == 3: #two artists - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string),\ - self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string)) - artistid = (torrenttd.findAll("a")[0]['href'][14:],\ - torrenttd.findAll("a")[1]['href'][14:]) - torrentalbum = torrenttd.findAll("a")[2].string - info = torrenttd.findAll("a")[2].nextSibling.string.strip() + torrentartist = (self.utils.decodeHTMLEntities(torrenttd_all_a[0].string),\ + self.utils.decodeHTMLEntities(torrenttd_all_a[1].string)) + artistid = (torrenttd_all_a[0]['href'][14:],\ + torrenttd_all_a[1]['href'][14:]) + torrentalbum = torrenttd_all_a[2].string + info = torrenttd_all_a[2].nextSibling.string.strip() elif torrenttd.find(text=re.compile('performed by')): #performed by - torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),) - artistid = (torrenttd.findAll("a")[-2]['href'][14:],) - torrentalbum = torrenttd.findAll("a")[-1].string - info = torrenttd.findAll("a")[-1].nextSibling.string.strip() + torrentartist = (self.utils.decodeHTMLEntities(torrenttd_all_a[-2].string), ) + artistid = (torrenttd_all_a[-2]['href'][14:], ) + torrentalbum = torrenttd_all_a[-1].string + info = torrenttd_all_a[-1].nextSibling.string.strip() if 'Scene' in info: isScene = True