diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html
index 2f1f5619..bb5ca038 100644
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@@ -311,7 +311,7 @@ m<%inherit file="base.html"/>
-
+
diff --git a/lib/whatapi.py b/lib/whatapi.py
index bc6f4394..b6f1b956 100755
--- a/lib/whatapi.py
+++ b/lib/whatapi.py
@@ -20,15 +20,15 @@
#################################################################################
-__author__="devilcius"
-__date__ ="$Oct 23, 2010 11:21:12 PM$"
+__author__ = "devilcius"
+__date__ = "$Oct 23, 2010 11:21:12 PM$"
import hashlib
try:
from BeautifulSoup import BeautifulSoup
except:
- raise ImportError,"Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download"
+ raise ImportError, "Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download"
import httplib
import os
import pickle
@@ -149,8 +149,8 @@ class WhatBase(object):
print "authenticating..."
self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader()
- def _request(self,type, path, data, headers):
- return Request(self.whatcd,type,path,data,headers)
+ def _request(self, type, path, data, headers):
+ return Request(self.whatcd, type, path, data, headers)
def _parser(self):
return Parser(self.whatcd)
@@ -187,7 +187,7 @@ class Utils():
return text.encode("utf-8")
- def _number(self,string):
+ def _number(self, string):
"""
Extracts an int from a string. Returns a 0 if None or an empty string was passed
"""
@@ -222,101 +222,101 @@ class Utils():
class WhatCD(object):
- def __init__(self, username, password, site, loginpage, headers):
+ def __init__(self, username, password, site, loginpage, headers):
- #credentials
- self.username = username
- self.password = password
- self.site = site
- self.loginpage = loginpage
- self.headers = headers
- self.authenticateduserinfo = {}
+ #credentials
+ self.username = username
+ self.password = password
+ self.site = site
+ self.loginpage = loginpage
+ self.headers = headers
+ self.authenticateduserinfo = {}
- self.cache_backend = None
- self.proxy_enabled = False
- self.proxy = None
+ self.cache_backend = None
+ self.proxy_enabled = False
+ self.proxy = None
- def isAuthenticated(self):
- """
+ def isAuthenticated(self):
+ """
Checks if we are authenticated in what.cd
"""
- if "id" in self.authenticateduserinfo:
- return True
- else:
- return False
+ if "id" in self.authenticateduserinfo:
+ return True
+ else:
+ return False
- def getCredentials(self):
- """
+ def getCredentials(self):
+ """
Returns an authenticated user credentials object
"""
- return Authenticate(self)
+ return Authenticate(self)
- def getUser(self, username):
- """
+ def getUser(self, username):
+ """
Returns an user object
"""
- return User(username, self)
+ return User(username, self)
- def getTorrent(self, id, page=1):
- """
+ def getTorrent(self, id, page=1):
+ """
Returns a torrent object
"""
- return Torrent(id, page, None, self)
+ return Torrent(id, page, None, self)
- def getTorrentGroup(self, id, page=1):
- """
+ def getTorrentGroup(self, id, page=1):
+ """
Returns a torrent object
"""
- return Torrent(id, page, True, self)
+ return Torrent(id, page, True, self)
- def getArtist(self, name):
- """
+ def getArtist(self, name):
+ """
Returns an artist object
"""
- return Artist(name, self)
+ return Artist(name, self)
- def enableProxy(self, host, port):
- """Enable a default web proxy"""
- self.proxy = [host, Utils()._number(port)]
- self.proxy_enabled = True
+ def enableProxy(self, host, port):
+ """Enable a default web proxy"""
+ self.proxy = [host, Utils()._number(port)]
+ self.proxy_enabled = True
- def disableProxy(self):
- """Disable using the web proxy"""
- self.proxy_enabled = False
+ def disableProxy(self):
+ """Disable using the web proxy"""
+ self.proxy_enabled = False
- def isProxyEnabled(self):
- """Returns True if a web proxy is enabled."""
- return self.proxy_enabled
+ def isProxyEnabled(self):
+ """Returns True if a web proxy is enabled."""
+ return self.proxy_enabled
- def getProxy(self):
- """Returns proxy details."""
- return self.proxy
+ def getProxy(self):
+ """Returns proxy details."""
+ return self.proxy
- def enableCaching(self, file_path = None):
- """Enables caching request-wide for all cachable calls.
+ def enableCaching(self, file_path=None):
+ """Enables caching request-wide for all cachable calls.
* file_path: A file path for the backend storage file. If
None set, a temp file would probably be created, according the backend.
"""
- if not file_path:
- file_path = tempfile.mktemp(prefix="whatapi_tmp_")
+ if not file_path:
+ file_path = tempfile.mktemp(prefix="whatapi_tmp_")
- self.cache_backend = _ShelfCacheBackend(file_path)
+ self.cache_backend = _ShelfCacheBackend(file_path)
- def disableCaching(self):
- """Disables all caching features."""
- self.cache_backend = None
+ def disableCaching(self):
+ """Disables all caching features."""
+ self.cache_backend = None
- def isCachingEnabled(self):
- """Returns True if caching is enabled."""
+ def isCachingEnabled(self):
+ """Returns True if caching is enabled."""
- return not (self.cache_backend == None)
+ return not (self.cache_backend == None)
- def getCacheBackend(self):
+ def getCacheBackend(self):
- return self.cache_backend
+ return self.cache_backend
-def getWhatcdNetwork(username = "", password = ""):
+def getWhatcdNetwork(username="", password=""):
"""
Returns a preconfigured WhatCD object for what.cd
# Parameters:
@@ -325,21 +325,21 @@ def getWhatcdNetwork(username = "", password = ""):
"""
return WhatCD (
- username = username,
- password = password,
- site = "ssl.what.cd",
- loginpage = "/login.php",
- headers = {
- "Content-type": "application/x-www-form-urlencoded",
- 'Accept-Charset': 'utf-8',
- 'User-Agent': "whatapi [devilcius]"
- })
+ username=username,
+ password=password,
+ site="ssl.what.cd",
+ loginpage="/login.php",
+ headers={
+ "Content-type": "application/x-www-form-urlencoded",
+ 'Accept-Charset': 'utf-8',
+ 'User-Agent': "whatapi [devilcius]"
+ })
class _ShelfCacheBackend(object):
"""Used as a backend for caching cacheable requests."""
- def __init__(self, file_path = None):
+ def __init__(self, file_path=None):
self.shelf = shelve.open(file_path)
def getHTML(self, key):
@@ -355,7 +355,7 @@ class _ShelfCacheBackend(object):
class Request(object):
"""web service operation."""
- def __init__(self, whatcd,type, path, data, headers):
+ def __init__(self, whatcd, type, path, data, headers):
self.whatcd = whatcd
self.utils = Utils()
@@ -394,8 +394,8 @@ class Request(object):
rb = ResponseBody()
if self.whatcd.isProxyEnabled():
- conn = httplib.HTTPSConnection(host = self.whatcd.getProxy()[0], port = self.whatcd.getProxy()[1])
- conn.request(method = self.type, url="https://" + self.whatcd.site + self.path, body = self.data, headers = self.headers)
+ conn = httplib.HTTPSConnection(host=self.whatcd.getProxy()[0], port=self.whatcd.getProxy()[1])
+ conn.request(method=self.type, url="https://" + self.whatcd.site + self.path, body=self.data, headers=self.headers)
else:
conn.request(self.type, self.path, self.data, self.headers)
@@ -406,7 +406,7 @@ class Request(object):
conn.close()
return rb
- def execute(self, cacheable = False):
+ def execute(self, cacheable=False):
"""Depending if caching is enabled, returns response from the server or, if available, the cached response"""
if self.whatcd.isCachingEnabled() and cacheable:
response = self.getCachedResponse()
@@ -430,23 +430,23 @@ class Authenticate(WhatBase):
def setCookie(self):
print "creating cookie"
f = open('cookie', 'w')
- loginform= {'username': self.whatcd.username, 'password': self.whatcd.password \
- , 'keeplogged': '1', 'login': 'Login'}
+ loginform = {'username': self.whatcd.username, 'password': self.whatcd.password\
+ , 'keeplogged': '1', 'login': 'Login'}
data = urllib.urlencode(loginform)
response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True)
try:
- cookie=dict(response.headers)['set-cookie']
- session=re.search("session=[^;]+", cookie).group(0)
+ cookie = dict(response.headers)['set-cookie']
+ session = re.search("session=[^;]+", cookie).group(0)
self.whatcd.headers["Cookie"] = session
homepage = response.body
pickle.dump(self.whatcd.headers, f)
except (KeyError, AttributeError):
+ print "Login failed, most likely bad creds or the site is down, nothing to do"
+ f.close()
os.remove('cookie')
self.whatcd.headers = None
-# quit() # Commented out...can't have this quitting headphones on us
- raise Exception("Login failed, most likely bad creds or the site is down, nothing to do")
- finally:
- f.close()
+# quit()
+ f.close()
def getAuthenticatedHeader(self):
@@ -459,18 +459,19 @@ class Authenticate(WhatBase):
try:
self.whatcd.headers = pickle.load(f)
except EOFError:
+ f.close()
os.remove("cookie")
print "invalid cookie, removed"
self.setCookie()
else:
self.setCookie()
- #set authenticated user info
+ #set authenticated user info
if 'id' not in self.whatcd.authenticateduserinfo:
self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage)
return self.whatcd.headers
- def getAuthenticatedUserInfo(self, homepage = None):
+ def getAuthenticatedUserInfo(self, homepage=None):
"""
Returns authenticated user's info
"""
@@ -611,11 +612,11 @@ class User(WhatBase):
"""
if self.userid is None:
self.userid = self.getUserId()
- url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid,page)
+ url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid, page)
torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
return self._parser().torrentsList(torrentspage)
- def getTorrentsSnatched(self,page=1):
+ def getTorrentsSnatched(self, page=1):
"""
Returns a list with all user's uploaded music torrents
in form of dictionary {page(tuple with current and total),tag, dlurl, id,
@@ -624,7 +625,7 @@ class User(WhatBase):
"""
if self.userid is None:
self.userid = self.getUserId()
- url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid,page)
+ url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid, page)
torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
return self._parser().torrentsList(torrentspage)
@@ -637,7 +638,7 @@ class User(WhatBase):
"""
if self.userid is None:
self.userid = self.getUserId()
- url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid,page)
+ url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid, page)
torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
return self._parser().torrentsList(torrentspage)
@@ -651,7 +652,7 @@ class User(WhatBase):
if self.userid is None:
self.userid = self.getUserId()
- url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1],page)
+ url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1], page)
torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
return self._parser().postsList(torrentspage)
@@ -669,14 +670,14 @@ class User(WhatBase):
info = SpecificInformation()
# Initialize attributes
info.joindate, info.lastseen, info.dataup, info.datadown,\
- info.ratio, info.rratio,info.uppercentile,info.downpercentile, \
- info.torrentsuppercentile,info.reqfilledpercentile,info.bountyspentpercentile, \
- info.postsmadepercentile,info.artistsaddedpercentile,info.overallpercentile, \
- info.postsmadecom,info.torrentscommentscom,info.collagesstartedcom,info.collagescontrcon, \
- info.reqfilledcom,info.reqvotedcom,info.uploadedcom,info.uniquecom, info.perfectcom, \
- info.seedingcom, info.leechingcom,info.snatchedcom,info.invitedcom,info.artistsaddedcom \
- = (None,None, None, None,None,None,None,None,None,None,None,None,None, None,\
- None,None,None,None,None,None,None,None,None,None,None,None,None,None)
+ info.ratio, info.rratio, info.uppercentile, info.downpercentile,\
+ info.torrentsuppercentile, info.reqfilledpercentile, info.bountyspentpercentile,\
+ info.postsmadepercentile, info.artistsaddedpercentile, info.overallpercentile,\
+ info.postsmadecom, info.torrentscommentscom, info.collagesstartedcom, info.collagescontrcon,\
+ info.reqfilledcom, info.reqvotedcom, info.uploadedcom, info.uniquecom, info.perfectcom,\
+ info.seedingcom, info.leechingcom, info.snatchedcom, info.invitedcom, info.artistsaddedcom\
+ = (None, None, None, None, None, None, None, None, None, None, None, None, None, None,\
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None)
if not self.userinfo and self.getInfo() is None:
@@ -686,7 +687,7 @@ class User(WhatBase):
info.joindate = self.userinfo['stats']['joined']
info.lastseen = self.userinfo['stats']['lastseen']
info.dataup = self.userinfo['stats']['uploaded']
- info.datadown = self.userinfo['stats']['downloaded']
+ info.datadown = self.userinfo['stats']['downloaded']
info.ratio = self.userinfo['stats']['ratio']
info.rratio = self.userinfo['stats']['rratio']
######## percentile ###########
@@ -740,7 +741,7 @@ class Torrent(WhatBase):
def getTorrentUrl(self):
"""
- Returns a dictionnary torrent's real URL
+ Returns torrent's URL
"""
if self.isParent:
form = {'id': self.id, 'page':self.page}
@@ -750,17 +751,25 @@ class Torrent(WhatBase):
form = {'torrentid': self.id, 'page':self.page}
data = urllib.urlencode(form)
headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers
+
if dict(headers) is None:
return None
else:
- return dict(headers)['location']
+ if 'location' not in dict(headers).keys():
+ return None
+ else:
+ return dict(headers)['location']
def getInfo(self):
"""
Returns a dictionnary with torrents's info
"""
- torrentpage = BeautifulSoup(self._request("GET", "/"+self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body)
+ if self.getTorrentUrl() is None:
+ print "no torrent retrieved with such id"
+ return None
+
+ torrentpage = BeautifulSoup(self._request("GET", "/" + self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body)
if 'Site log' in torrentpage.find("title").string:
print "no torrent retrieved with such id"
@@ -929,7 +938,7 @@ class Artist(WhatBase):
"""
form = {'artistname': self.name}
data = urllib.urlencode(form)
- headers = self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).headers
+ headers = self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).headers
if dict(headers)['location'][0:14] != 'artist.php?id=':
return None
else:
@@ -942,7 +951,7 @@ class Artist(WhatBase):
if self.getArtistId():
form = {'id': self.getArtistId()}
data = urllib.urlencode(form)
- artistpage = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
+ artistpage = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body)
return self._parser().artistInfo(artistpage)
else:
print "no artist info retrieved"
@@ -993,11 +1002,11 @@ class Artist(WhatBase):
* info tuple: (The artist's info -str-, image url -str- (None if there isn't))
"""
if info[0]:
- params = {'action': 'edit','artistid':id}
+ params = {'action': 'edit', 'artistid':id}
data = urllib.urlencode(params)
- edit_page = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
- what_form = self._parser().whatForm(edit_page,'edit')
+ edit_page = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body)
+ what_form = self._parser().whatForm(edit_page, 'edit')
if info[1]:
image_to_post = info[1]
else:
@@ -1010,433 +1019,1880 @@ class Artist(WhatBase):
'action':what_form['action']}
#post artist's info
- self.whatcd.headers['Content-type']="application/x-www-form-urlencoded"
+ self.whatcd.headers['Content-type'] = "application/x-www-form-urlencoded"
response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False)
artist_id_returned = dict(response.headers)['location'][14:]
- if str(artist_id_returned) == str(what_form['artistid']) :
+ if str(artist_id_returned) == str(what_form['artistid']):
return 1
else:
return 0
else:
- return 'no artist info provided. Aborting.'
- exit()
+ return 'no artist info provided. Aborting.'
+ exit()
class Parser(object):
- def __init__(self,whatcd):
- self.utils = Utils()
- self.whatcd = whatcd
- self.totalpages = 0
+ def __init__(self, whatcd):
+ self.utils = Utils()
+ self.whatcd = whatcd
+ self.totalpages = 0
- def authenticatedUserInfo(self, dom):
- """
- Parse the index page and returns a dictionnary with basic authenticated user information
- """
- userInfo = {}
- soup = BeautifulSoup(str(dom))
- for ul in soup.fetch('ul'):
- if ul["id"] == "userinfo_username":
- #retrieve user logged id
- hrefid = ul.findAll('li')[0].find("a")["href"]
- regid = re.compile('[0-9]+')
- if regid.search(hrefid) is None:
- self.debugMessage("not found href to retrieve user id")
- else:
- userInfo["id"] = regid.search(hrefid).group(0)
+ def authenticatedUserInfo(self, dom):
+ """
+ Parse the index page and returns a dictionnary with basic authenticated user information
+ """
+ userInfo = {}
+ soup = BeautifulSoup(str(dom))
+ for ul in soup.fetch('ul'):
+ if ul["id"] == "userinfo_username":
+ #retrieve user logged id
+ hrefid = ul.findAll('li')[0].find("a")["href"]
+ regid = re.compile('[0-9]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["id"] = regid.search(hrefid).group(0)
- #retrieve user logged id
- hrefauth = ul.findAll('li')[2].find("a")["href"]
- regauth = re.compile('=[0-9a-fA-F]+')
- if regid.search(hrefid) is None:
- self.debugMessage("not found href to retrieve user id")
- else:
- userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:]
+ #retrieve user logged id
+ hrefauth = ul.findAll('li')[2].find("a")["href"]
+ regauth = re.compile('=[0-9a-fA-F]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:]
- elif ul["id"] == "userinfo_stats":
- if len(ul.findAll('li')) > 0:
- userInfo["uploaded"] = ul.findAll('li')[0].find("span").string
- userInfo["downloaded"] = ul.findAll('li')[1].find("span").string
- userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string
- userInfo["required"] = ul.findAll('li')[3].find("span").string
- userInfo["authenticate"] = True
+ elif ul["id"] == "userinfo_stats":
+ if len(ul.findAll('li')) > 0:
+ userInfo["uploaded"] = ul.findAll('li')[0].find("span").string
+ userInfo["downloaded"] = ul.findAll('li')[1].find("span").string
+ userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string
+ userInfo["required"] = ul.findAll('li')[3].find("span").string
+ userInfo["authenticate"] = True
- return userInfo
+ return userInfo
- def userInfo(self, dom, user):
- """
- Parse an user's page and returns a dictionnary with its information
+ def userInfo(self, dom, user):
+ """
+ Parse an user's page and returns a dictionnary with its information
- # Parameters:
- * dom str: user page html
- * user str: what.cd username
- """
- userInfo = {'stats':{}, 'percentile':{}, 'community':{}}
- soup = BeautifulSoup(str(dom))
+ # Parameters:
+ * dom str: user page html
+ * user str: what.cd username
+ """
+ userInfo = {'stats':{}, 'percentile':{}, 'community':{}}
+ soup = BeautifulSoup(str(dom))
- for div in soup.fetch('div',{'class':'box'}):
+ for div in soup.fetch('div', {'class':'box'}):
- #if paronoia is not set to 'Off', stop collecting data
- if div.findAll('div')[0].string == "Personal":
- if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off":
- return None
+ #if paronoia is not set to 'Off', stop collecting data
+ if div.findAll('div')[0].string == "Personal":
+ if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off":
+ return None
- statscontainer = soup.findAll('div', {'class':'box'})[1]
- percentilecontainer = soup.findAll('div', {'class':'box'})[2]
- communitycontainer = soup.findAll('div', {'class':'box'})[4]
+ statscontainer = soup.findAll('div', {'class':'box'})[1]
+ percentilecontainer = soup.findAll('div', {'class':'box'})[2]
+ communitycontainer = soup.findAll('div', {'class':'box'})[4]
- userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title']
- userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title']
- userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:]
- userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:]
- userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string
- userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:]
- userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:]
- userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:]
- userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:]
- userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:]
- userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:]
- userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:]
- userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:]
- userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:]
+ userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title']
+ userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title']
+ userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:]
+ userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:]
+ userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string
+ userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:]
+ userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:]
+ userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:]
+ userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:]
+ userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:]
+ userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:]
+ userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:]
+ userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:]
+ userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:]
- userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\
- communitycontainer.findAll('li')[0].find('a')['href'])
- userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\
- communitycontainer.findAll('li')[1].find('a')['href'])
- userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\
- communitycontainer.findAll('li')[2].find('a')['href'])
- userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\
+ userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\
+ communitycontainer.findAll('li')[0].find('a')['href'])
+ userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\
+ communitycontainer.findAll('li')[1].find('a')['href'])
+ userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\
+ communitycontainer.findAll('li')[2].find('a')['href'])
+ userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\
communitycontainer.findAll('li')[3].find('a')['href'])
- userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\
- communitycontainer.findAll('li')[4].find('a')['href'])
- userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\
- communitycontainer.findAll('li')[5].find('a')['href'])
- userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\
- communitycontainer.findAll('li')[6].find('a')['href'])
- userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\
- communitycontainer.findAll('li')[7].find('a')['href'])
- userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\
- communitycontainer.findAll('li')[8].find('a')['href'])
- userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\
- communitycontainer.findAll('li')[9].find('a')['href'])
- userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\
- communitycontainer.findAll('li')[10].find('a')['href'])
- #NB: there's a carriage return and white spaces inside the snatched li tag
- userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\
- communitycontainer.findAll('li')[11].find('a')['href'])
- userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\
- None)
- userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title']
+ userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\
+ communitycontainer.findAll('li')[4].find('a')['href'])
+ userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\
+ communitycontainer.findAll('li')[5].find('a')['href'])
+ userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\
+ communitycontainer.findAll('li')[6].find('a')['href'])
+ userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\
+ communitycontainer.findAll('li')[7].find('a')['href'])
+ userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\
+ communitycontainer.findAll('li')[8].find('a')['href'])
+ userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\
+ communitycontainer.findAll('li')[9].find('a')['href'])
+ userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\
+ communitycontainer.findAll('li')[10].find('a')['href'])
+ #NB: there's a carriage return and white spaces inside the snatched li tag
+ userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\
+ communitycontainer.findAll('li')[11].find('a')['href'])
+ userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\
+ None)
+ userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title']
- return userInfo
+ return userInfo
- def torrentInfo(self, dom, id, isparent):
- """
- Parse a torrent's page and returns a dictionnary with its information
- """
+ def torrentInfo(self, dom, id, isparent):
+ """
+ Parse a torrent's page and returns a dictionnary with its information
+ """
- torrentInfo = {'torrent':{}}
- torrentfiles = []
- torrentdescription = ""
- isreported = False
- isfreeleech = False
- soup = BeautifulSoup(str(dom))
- if isparent:
- torrentInfo['torrent']['parentid'] = id
- else:
- groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href']
- torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1]
- regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray')
- torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0)
- torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=")+1:]
- torrentInfo['torrent']['downloadurl'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a',{'title':'Download'})[0]['href']
- ## is freeleech or/and reported? ##
- #both
- if len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 4:
+ torrentInfo = {'torrent':{}}
+ torrentfiles = []
+ torrentdescription = ""
+ isreported = False
+ isfreeleech = False
+ soup = BeautifulSoup(str(dom))
+ if isparent:
+ torrentInfo['torrent']['parentid'] = id
+ else:
+ groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href']
+ torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1]
+ regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray')
+ torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0)
+ torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=") + 1:]
+ torrentInfo['torrent']['downloadurl'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a', {'title':'Download'})[0]['href']
+ ## is freeleech or/and reported? ##
+ #both
+ if len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 4:
+ isreported = True
+ isfreeleech = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ #either
+ elif len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 2:
+ if soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Reported':
isreported = True
- isfreeleech = True
- torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
- #either
- elif len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 2:
- if soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Reported':
- isreported = True
- elif soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!':
- isreported = True
- torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
- #none
- else:
- torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
- torrentInfo['torrent']['isfreeleech'] = isfreeleech
- torrentInfo['torrent']['isreported'] = isreported
- torrentInfo['torrent']['size'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[1].string
- torrentInfo['torrent']['snatched'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[2].string
- torrentInfo['torrent']['seeders'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[3].string
- torrentInfo['torrent']['leechers'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[4].string
- torrentInfo['torrent']['uploadedby'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('a')[0].string
- foldername = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('div')[1].string
- if(foldername is None):
- torrentInfo['torrent']['foldername'] = None
- else:
- torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername)
- files = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('tr')
- for file in files[1:-1]:
- torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string))
- torrentInfo['torrent']['filelist'] = torrentfiles
- #is there any description?
- if len(soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')) > 1:
- description = torrentInfo['torrent']['description'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')[1].contents
- info = ''
- for content in description:
- if content.string:
- info = "%s%s" % (info, self.utils._string(content.string))
- torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string))
- torrentInfo['torrent']['torrentdescription'] = torrentdescription
- regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown')
- torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0)
-
- torrentInfo['torrent']['comments'] = []
- torrentInfo['torrent']['commentspages'] = 0
-
- if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0:
- linkbox = dom.findAll("div", {"class": "linkbox"})[-1]
- pages = 1
- postid = ''
- userid = ''
- post = ''
- # if there's more than 1 page of torrents
- if linkbox.find("a"):
- # by default torrent page show last page of comments
- lastpage = linkbox.findAll("a")[-1]['href']
- pages = int(lastpage[18:lastpage.find('&')]) +1
- for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}):
- postid = comment.find("a",{"class":"post_id"}).string[1:]
- userid = comment.findAll("a")[1]['href'][12:]
- username = comment.findAll("a")[1].string
- post = comment.find("div", {"id":"content"+postid})
- post = u''.join([post.string for post in post.findAll(text=True)])
- torrentInfo['torrent']['comments'].append({"postid":postid,"post":post,"userid":userid,"username":username})
-
- torrentInfo['torrent']['commentspages'] = pages
-
- return torrentInfo
-
- def artistInfo(self, dom):
- """
- Parse an artist's page and returns a dictionnary with its information
- """
- artistInfo = {}
- releases = []
- requests = []
- infoartist = ""
- tagsartist = []
- similarartists = []
- soup = BeautifulSoup(str(dom))
- soupfetch = soup.fetch('table',{'class':'torrent_table'})
- if not soupfetch:
- soupfetch = soup.fetch('table',{'class':'torrent_table grouped release_table'})
- for releasetype in soupfetch:
- releasetypenames = releasetype.findAll('strong')
- releasetypename = releasetype.findAll('strong')[0].string
- for release in releasetypenames[1:-1]:
- #skip release edition info and Freeleech! s
- if len(release.parent.contents) > 1 and len(release.contents) > 1 :
- releaseyear = release.contents[0][0:4]
- releasename = release.contents[1].string
- releasehref = release.contents[1]['href']
- releaseid = releasehref[releasehref.rfind('=')+1:]
- releases.append({'releasetype':releasetypename,\
- 'year': releaseyear,'name':self.utils.decodeHTMLEntities(releasename),'id':releaseid})
-
- artistInfo['releases'] = releases
- #is there an artist image?
- artistInfo['image'] = None
- if soup.find('div', {'class':'box'}).find('img'):
- artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src']
- #is there any artist info?
- contents = soup.find('div', {'class':'body'}).contents
- if len(contents) > 0:
- for content in contents:
+ elif soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!':
+ isreported = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ #none
+ else:
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ torrentInfo['torrent']['isfreeleech'] = isfreeleech
+ torrentInfo['torrent']['isreported'] = isreported
+ torrentInfo['torrent']['size'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[1].string
+ torrentInfo['torrent']['snatched'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[2].string
+ torrentInfo['torrent']['seeders'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[3].string
+ torrentInfo['torrent']['leechers'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[4].string
+ torrentInfo['torrent']['uploadedby'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('a')[0].string
+ foldername = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('div')[1].string
+ if(foldername is None):
+ torrentInfo['torrent']['foldername'] = None
+ else:
+ torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername)
+ files = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('tr')
+ for file in files[1:-1]:
+ torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string))
+ torrentInfo['torrent']['filelist'] = torrentfiles
+ #is there any description?
+ if len(soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')) > 1:
+ description = torrentInfo['torrent']['description'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')[1].contents
+ info = ''
+ for content in description:
if content.string:
- infoartist = "%s%s" % (infoartist, self.utils._string(content.string))
- artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist)
- #is there any artist tags?
- if soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li'):
- ul = soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li')
- for li in ul:
- if li.contents[0].string:
- tagsartist.append(self.utils._string(li.contents[0].string))
- artistInfo['tags'] = tagsartist
- #is there any similar artist?
- if soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'}):
- artists = soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'})
- for artist in artists:
- if artist.contents[0].string:
- similarartists.append(self.utils._string(artist.contents[0].string))
- artistInfo['similarartists'] = similarartists
- #is there any request?
- if soup.find('table',{'id':'requests'}):
- for request in soup.find('table',{'id':'requests'}).findAll('tr',{'class':re.compile('row')}):
- requests.append({'requestname':request.findAll('a')[1].string,'id':request.findAll('a')[1]['href'][28:]})
+ info = "%s%s" % (info, self.utils._string(content.string))
+ torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string))
+ torrentInfo['torrent']['torrentdescription'] = torrentdescription
+ regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown')
+ torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0)
- artistInfo['requests'] = requests
+ torrentInfo['torrent']['comments'] = []
+ torrentInfo['torrent']['commentspages'] = 0
- return artistInfo
+ if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0:
+ linkbox = dom.findAll("div", {"class": "linkbox"})[-1]
+ pages = 1
+ postid = ''
+ userid = ''
+ post = ''
+ # if there's more than 1 page of torrents
+ if linkbox.find("a"):
+ # by default torrent page show last page of comments
+ lastpage = linkbox.findAll("a")[-1]['href']
+ pages = int(lastpage[18:lastpage.find('&')]) + 1
+ for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}):
+ postid = comment.find("a", {"class":"post_id"}).string[1:]
+ userid = comment.findAll("a")[1]['href'][12:]
+ username = comment.findAll("a")[1].string
+ post = comment.find("div", {"id":"content" + postid})
+ post = u''.join([post.string for post in post.findAll(text=True)])
+ torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username})
- def torrentsList(self,dom):
- """
- Parse a torrent's list page and returns a dictionnary with its information
- """
- torrentslist = []
- torrentssoup = dom.find("table", {"width": "100%"})
- pages = 0
+ torrentInfo['torrent']['commentspages'] = pages
- #if there's at least 1 torrent in the list
- if torrentssoup:
- navsoup = dom.find("div", {"class": "linkbox"})
- pages = 1
- regyear = re.compile('\[\d{4}\]')
+ return torrentInfo
- #is there a page navigation bar?
- if navsoup.contents:
- #if there's more than 1 page of torrents
- if navsoup.contents[-1].has_key('href'):
- lastpage = navsoup.contents[-1]['href']
- pages = lastpage[18:lastpage.find('&')]
- self.totalpages = pages
- else: #we are at the last page, no href
- pages = self.totalpages+1
- #fetch all tr except first one (column head)
- for torrent in torrentssoup.fetch('tr')[1:]:
- #exclude non music torrents
- if torrent.find('td').find('div')['class'][0:10] == 'cats_music':
+ def artistInfo(self, dom):
+ """
+ Parse an artist's page and returns a dictionnary with its information
+ """
+ artistInfo = {}
+ releases = []
+ requests = []
+ infoartist = ""
+ tagsartist = []
+ similarartists = []
+ soup = BeautifulSoup(str(dom))
+ for releasetype in soup.fetch('table', {'class':'torrent_table'}):
+ releasetypenames = releasetype.findAll('strong')
+ releasetypename = releasetype.findAll('strong')[0].string
+ for release in releasetypenames[1:-1]:
+ #skip release edition info and Freeleech! s
+ if len(release.parent.contents) > 1 and len(release.contents) > 1:
+ releaseyear = release.contents[0][0:4]
+ releasename = release.contents[1].string
+ releasehref = release.contents[1]['href']
+ releaseid = releasehref[releasehref.rfind('=') + 1:]
+ releases.append({'releasetype':releasetypename,\
+ 'year': releaseyear, 'name':self.utils.decodeHTMLEntities(releasename), 'id':releaseid})
- torrenttag = torrent.find('td').contents[1]['title']
- torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href']
- torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href']
- torrentid = torrentrm[torrentrm.rfind('=')+1:]
- torrenttd = torrent.findAll('td')[1]
+ artistInfo['releases'] = releases
+ #is there an artist image?
+ artistInfo['image'] = None
+ if soup.find('div', {'class':'box'}).find('img'):
+ artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src']
+ #is there any artist info?
+ contents = soup.find('div', {'class':'body'}).contents
+ if len(contents) > 0:
+ for content in contents:
+ if content.string:
+ infoartist = "%s%s" % (infoartist, self.utils._string(content.string))
+ artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist)
+ #is there any artist tags?
+ if soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li'):
+ ul = soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li')
+ for li in ul:
+ if li.contents[0].string:
+ tagsartist.append(self.utils._string(li.contents[0].string))
+ artistInfo['tags'] = tagsartist
+ #is there any similar artist?
+ if soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}):
+ artists = soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'})
+ for artist in artists:
+ if artist.contents[0].string:
+ similarartists.append(self.utils._string(artist.contents[0].string))
+ artistInfo['similarartists'] = similarartists
+ #is there any request?
+ if soup.find('table', {'id':'requests'}):
+ for request in soup.find('table', {'id':'requests'}).findAll('tr', {'class':re.compile('row')}):
+ requests.append({'requestname':request.findAll('a')[1].string, 'id':request.findAll('a')[1]['href'][28:]})
- # remove dataless elements
- torrenttags = torrenttd.div
- rightlinks = torrenttd.span
- torrenttags.extract()
- rightlinks.extract()
+ artistInfo['requests'] = requests
- # remove line breaks
- torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")])
- torrenttd = BeautifulSoup(torrenttd)
- isScene = False
- info = ""
+ return artistInfo
- if len(torrenttd.findAll('a')) == 2:
- #one artist
- torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),)
- artistid = (torrenttd.find("a")['href'][14:],)
- torrentalbum = torrenttd.findAll("a")[1].string
- info = torrenttd.findAll("a")[1].nextSibling.string.strip()
+ def torrentsList(self, dom):
+ """
+ Parse a torrent's list page and returns a dictionnary with its information
+ """
+ torrentslist = []
+ torrentssoup = dom.find("table", {"width": "100%"})
+ pages = 0
+ #if there's at least 1 torrent in the list
+ if torrentssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+ pages = 1
+ regyear = re.compile('\[\d{4}\]')
- elif len(torrenttd.findAll('a')) == 1:
- #various artists
- torrentartist = ('Various Artists',)
- artistid = ()
- torrentalbum = torrenttd.find("a").string
- info = torrenttd.find("a").nextSibling.string.strip()
-
- elif len(torrenttd.findAll('a')) == 3:
- #two artists
- torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string), \
- self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string))
- artistid = (torrenttd.findAll("a")[0]['href'][14:],\
- torrenttd.findAll("a")[1]['href'][14:])
- torrentalbum = torrenttd.findAll("a")[2].string
- info = torrenttd.findAll("a")[2].nextSibling.string.strip()
-
- elif torrenttd.find(text=re.compile('performed by')):
- #performed by
- torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),)
- artistid = (torrenttd.findAll("a")[-2]['href'][14:],)
- torrentalbum = torrenttd.findAll("a")[-1].string
- info = torrenttd.findAll("a")[-1].nextSibling.string.strip()
-
- if 'Scene' in info:
- isScene = True
-
- torrentyear = regyear.search(info).group(0)[1:5]
- torrentslist.append({'tag':torrenttag,\
- 'dlurl':torrentdl,\
- 'id':torrentid, \
- 'artist':torrentartist,\
- 'artistid':artistid,\
- 'album':self.utils.decodeHTMLEntities(torrentalbum),
- 'year':torrentyear,
- 'pages':pages,
- 'scene':isScene})
-
- return torrentslist
-
- def postsList(self,dom):
- """
- Parse a post list page and returns a dictionnary with each post information:
- {torrentid, commentid, postid}
- """
- postslist = []
- postssoup = dom.find("div", {"class": "thin"})
- pages = 0
-
- #if there's at least 1 post in the list
- if postssoup:
- navsoup = dom.find("div", {"class": "linkbox"})
-
+ #is there a page navigation bar?
+ if navsoup.contents:
#if there's more than 1 page of torrents
- if navsoup.find("a"):
- lastpage = navsoup.findAll("a")[1]['href']
+ if navsoup.contents[-1].has_key('href'):
+ lastpage = navsoup.contents[-1]['href']
pages = lastpage[18:lastpage.find('&')]
self.totalpages = pages
- else: #we are at the last page, no link
- pages = 1
+ else: #we are at the last page, no href
+ pages = self.totalpages + 1
+ #fetch all tr except first one (column head)
+ for torrent in torrentssoup.fetch('tr')[1:]:
+ #exclude non music torrents
+ if torrent.find('td').find('div')['class'][0:10] == 'cats_music':
- for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}):
- commentbody = post.find("td", {"class":"body"})
- postid = post.find("span").findAll("a")[0].string[1:]
- torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=')+1:]
- comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)])
- postdate = post.find("span", {"class":"time"})['title']
- postslist.append({'postid':postid,\
- 'torrentid':torrentid,\
- 'comment':comment,\
- 'postdate':postdate,\
- 'pages':pages})
+ torrenttag = torrent.find('td').contents[1]['title']
+ torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href']
+ torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href']
+ torrentid = torrentrm[torrentrm.rfind('=') + 1:]
+ torrenttd = torrent.findAll('td')[1]
+
+ # remove dataless elements
+ torrenttags = torrenttd.div
+ rightlinks = torrenttd.span
+ torrenttags.extract()
+ rightlinks.extract()
+
+ # remove line breaks
+ torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")])
+ torrenttd = BeautifulSoup(torrenttd)
+ isScene = False
+ info = ""
+
+ if len(torrenttd.findAll('a')) == 2:
+ #one artist
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),)
+ artistid = (torrenttd.find("a")['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[1].string
+ info = torrenttd.findAll("a")[1].nextSibling.string.strip()
- return postslist
+ elif len(torrenttd.findAll('a')) == 1:
+ #various artists
+ torrentartist = ('Various Artists',)
+ artistid = ()
+ torrentalbum = torrenttd.find("a").string
+ info = torrenttd.find("a").nextSibling.string.strip()
+
+ elif len(torrenttd.findAll('a')) == 3:
+ #two artists
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string),\
+ self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string))
+ artistid = (torrenttd.findAll("a")[0]['href'][14:],\
+ torrenttd.findAll("a")[1]['href'][14:])
+ torrentalbum = torrenttd.findAll("a")[2].string
+ info = torrenttd.findAll("a")[2].nextSibling.string.strip()
+
+ elif torrenttd.find(text=re.compile('performed by')):
+ #performed by
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),)
+ artistid = (torrenttd.findAll("a")[-2]['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[-1].string
+ info = torrenttd.findAll("a")[-1].nextSibling.string.strip()
+
+ if 'Scene' in info:
+ isScene = True
+
+ torrentyear = regyear.search(info).group(0)[1:5]
+ torrentslist.append({'tag':torrenttag,\
+ 'dlurl':torrentdl,\
+ 'id':torrentid,\
+ 'artist':torrentartist,\
+ 'artistid':artistid,\
+ 'album':self.utils.decodeHTMLEntities(torrentalbum),
+ 'year':torrentyear,
+ 'pages':pages,
+ 'scene':isScene})
+
+ return torrentslist
+
+ def postsList(self, dom):
+ """
+ Parse a post list page and returns a dictionnary with each post information:
+ {torrentid, commentid, postid}
+ """
+ postslist = []
+ postssoup = dom.find("div", {"class": "thin"})
+ pages = 0
+
+ #if there's at least 1 post in the list
+ if postssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+
+ #if there's more than 1 page of torrents
+ if navsoup.find("a"):
+ lastpage = navsoup.findAll("a")[1]['href']
+ pages = lastpage[18:lastpage.find('&')]
+ self.totalpages = pages
+ else: #we are at the last page, no link
+ pages = 1
+
+ for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}):
+ commentbody = post.find("td", {"class":"body"})
+ postid = post.find("span").findAll("a")[0].string[1:]
+ torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=') + 1:]
+ comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)])
+ postdate = post.find("span", {"class":"time"})['title']
+ postslist.append({'postid':postid,\
+ 'torrentid':torrentid,\
+ 'comment':comment,\
+ 'postdate':postdate,\
+ 'pages':pages})
- def whatForm(self, dom, action):
- """
- Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values
- # Parameters:
- * dom str: the edit page dom.
- + action str: the action value from the requested form
- """
- inputs = {}
+ return postslist
- form = dom.find('input',{'name':'action','value':action}).parent
- elements = form.fetch(('input','textarea'))
- #get all form elements except for submit input
- for element in elements[0:-1]:
- name = element.get('name',None)
- if element.name == 'textarea':
- inputs[name] = element.string
- else:
- inputs[name] = element.get('value',None)
- return inputs
+
+ def whatForm(self, dom, action):
+ """
+ Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values
+ # Parameters:
+ * dom str: the edit page dom.
+ + action str: the action value from the requested form
+ """
+ inputs = {}
+
+ form = dom.find('input', {'name':'action', 'value':action}).parent
+ elements = form.fetch(('input', 'textarea'))
+ #get all form elements except for submit input
+ for element in elements[0:3]:
+ name = element.get('name', None)
+ if element.name == 'textarea':
+ inputs[name] = element.string
+ else:
+ inputs[name] = element.get('value', None)
+ return inputs
if __name__ == "__main__":
- print "Module to manage what.cd as a web service"
+ print "Module to manage what.cd as a web service"
+# -*- coding: utf_8 -*-
+#################################################################################
+#
+# Name: whatapi.py
+#
+# Synopsis: Module to manage what.cd as a web service
+#
+# Description: See below list of the implemented webservices
+#
+# Copyright 2010 devilcius
+#
+# The Wide Open License (WOL)
+#
+# Permission to use, copy, modify, distribute and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice and this license appear in all source copies.
+# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
+# ANY KIND. See http://www.dspguru.com/wide-open-license for more information.
+#
+#################################################################################
+
+
+__author__ = "devilcius"
+__date__ = "$Oct 23, 2010 11:21:12 PM$"
+
+
+import hashlib
+try:
+ from BeautifulSoup import BeautifulSoup
+except:
+ raise ImportError, "Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download"
+import httplib
+import os
+import pickle
+import re
+import urllib
+import shelve
+import tempfile
+from htmlentitydefs import name2codepoint as n2cp
+
+
+"""
+A list of the implemented webservices (from what.cd )
+=====================================
+
+# User
+
+ * user.getUserId
+ * user.getInfo
+
+ * user.getTorrentsSeeding
+ * user.getTorrentsSnatched
+ * user.getTorrentsUploaded
+ * user.getTorrentsCommented
+
+ * user.specificUserInfo
+ Atributes:
+ ######## stats ###########
+ -joindate
+ -lastseen
+ -dataup
+ -datadown
+ -ratio
+ -rratio
+ ######## percentile ###########
+ -uppercentile
+ -downpercentile
+ -torrentsuppercentile
+ -reqfilledpercentile
+ -bountyspentpercentile
+ -postsmadepercentile
+ -artistsaddedpercentile
+ -overallpercentile
+ ######## community ###########
+ -postsmade
+ -torrentscomments
+ -collagesstarted
+ -collagescontr
+ -reqfilled
+ -reqvoted
+ -uploaded
+ -unique
+ -perfect
+ -seeding
+ -leeching
+ -snatched
+ -invited
+ -artistsadded
+
+
+# Artist
+
+ * artist.getArtistReleases
+ * artist.getArtistImage
+ * artist.getArtistInfo
+ * artist.getArtistTags
+ * artist.getArtistSimilar
+ * artist.getArtistRequests
+
+ + artist.setArtistInfo
+
+
+# Torrent
+
+ * torrent.getTorrentParentId
+ * torrent.getTorrentDownloadURL
+ * torrent.getTorrentDetails
+ * torrent.getTorrentSize
+ * torrent.getTorrentSnatched
+ * torrent.getTorrentSeeders
+ * torrent.getTorrentLeechers
+ * torrent.getTorrentUploadedBy
+ * torrent.getTorrentFolderName
+ * torrent.getTorrentFileList
+ * torrent.getTorrentDescription
+ * torrent.getTorrentComments
+ * torrent.isTorrentFreeLeech
+ * torrent.isTorrentReported
+
+
+# Authenticate
+
+ * authenticate.getAuthenticatedUserId
+ * authenticate.getAuthenticatedUserAuthCode
+ * authenticate.getAuthenticatedUserDownload
+ * authenticate.getAuthenticatedUserUpload()
+ * authenticate.getAuthenticatedUserRatio
+ * authenticate.getAuthenticatedUserRequiredRatio
+
+"""
+
+class ResponseBody:
+ """A Response Body Object"""
+ pass
+
+class SpecificInformation:
+ """A Specific Information Object"""
+ pass
+
+
+class WhatBase(object):
+ """An abstract webservices object."""
+ whatcd = None
+
+ def __init__(self, whatcd):
+ self.whatcd = whatcd
+ #if we are not autenticated in what.cd, do it now
+ if not self.whatcd.isAuthenticated():
+ print "authenticating..."
+ self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader()
+
+ def _request(self, type, path, data, headers):
+ return Request(self.whatcd, type, path, data, headers)
+
+ def _parser(self):
+ return Parser(self.whatcd)
+
+ def utils(self):
+ return Utils()
+
+
+class Utils():
+
+ def md5(self, text):
+ """Returns the md5 hash of a string."""
+
+ h = hashlib.md5()
+ h.update(self._string(text))
+
+ return h.hexdigest()
+
+ def _unicode(self, text):
+ if type(text) == unicode:
+ return text
+
+ if type(text) == int:
+ return unicode(text)
+
+ return unicode(text, "utf-8")
+
+ def _string(self, text):
+ if type(text) == str:
+ return text
+
+ if type(text) == int:
+ return str(text)
+
+ return text.encode("utf-8")
+
+ def _number(self, string):
+ """
+ Extracts an int from a string. Returns a 0 if None or an empty string was passed
+ """
+
+ if not string:
+ return 0
+ elif string == "":
+ return 0
+ else:
+ try:
+ return int(string)
+ except ValueError:
+ return float(string)
+
+ def substituteEntity(self, match):
+ ent = match.group(2)
+ if match.group(1) == "#":
+ return unichr(int(ent))
+ else:
+ cp = n2cp.get(ent)
+
+ if cp:
+ return unichr(cp)
+ else:
+ return match.group()
+
+ def decodeHTMLEntities(self, string):
+ entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
+ return entity_re.subn(self.substituteEntity, string)[0]
+
+
+
+class WhatCD(object):
+
+ def __init__(self, username, password, site, loginpage, headers):
+
+ #credentials
+ self.username = username
+ self.password = password
+ self.site = site
+ self.loginpage = loginpage
+ self.headers = headers
+ self.authenticateduserinfo = {}
+
+ self.cache_backend = None
+ self.proxy_enabled = False
+ self.proxy = None
+
+ def isAuthenticated(self):
+ """
+ Checks if we are authenticated in what.cd
+ """
+ if "id" in self.authenticateduserinfo:
+ return True
+ else:
+ return False
+
+ def getCredentials(self):
+ """
+ Returns an authenticated user credentials object
+ """
+ return Authenticate(self)
+
+
+ def getUser(self, username):
+ """
+ Returns an user object
+ """
+ return User(username, self)
+
+ def getTorrent(self, id, page=1):
+ """
+ Returns a torrent object
+ """
+ return Torrent(id, page, None, self)
+
+ def getTorrentGroup(self, id, page=1):
+ """
+ Returns a torrent object
+ """
+ return Torrent(id, page, True, self)
+
+ def getArtist(self, name):
+ """
+ Returns an artist object
+ """
+ return Artist(name, self)
+
+ def enableProxy(self, host, port):
+ """Enable a default web proxy"""
+ self.proxy = [host, Utils()._number(port)]
+ self.proxy_enabled = True
+
+ def disableProxy(self):
+ """Disable using the web proxy"""
+ self.proxy_enabled = False
+
+ def isProxyEnabled(self):
+ """Returns True if a web proxy is enabled."""
+ return self.proxy_enabled
+
+ def getProxy(self):
+ """Returns proxy details."""
+ return self.proxy
+
+ def enableCaching(self, file_path=None):
+ """Enables caching request-wide for all cachable calls.
+ * file_path: A file path for the backend storage file. If
+ None set, a temp file would probably be created, according the backend.
+ """
+ if not file_path:
+ file_path = tempfile.mktemp(prefix="whatapi_tmp_")
+
+ self.cache_backend = _ShelfCacheBackend(file_path)
+
+ def disableCaching(self):
+ """Disables all caching features."""
+ self.cache_backend = None
+
+ def isCachingEnabled(self):
+ """Returns True if caching is enabled."""
+
+ return not (self.cache_backend == None)
+
+ def getCacheBackend(self):
+
+ return self.cache_backend
+
+def getWhatcdNetwork(username="", password=""):
+ """
+ Returns a preconfigured WhatCD object for what.cd
+ # Parameters:
+ * username str: a username of a valid what.cd user
+ * password str: user's password
+ """
+
+ return WhatCD (
+ username=username,
+ password=password,
+ site="ssl.what.cd",
+ loginpage="/login.php",
+ headers={
+ "Content-type": "application/x-www-form-urlencoded",
+ 'Accept-Charset': 'utf-8',
+ 'User-Agent': "whatapi [devilcius]"
+ })
+
+
+
+class _ShelfCacheBackend(object):
+ """Used as a backend for caching cacheable requests."""
+ def __init__(self, file_path=None):
+ self.shelf = shelve.open(file_path)
+
+ def getHTML(self, key):
+ return self.shelf[key]
+
+ def setHTML(self, key, xml_string):
+ self.shelf[key] = xml_string
+
+ def hasKey(self, key):
+ return key in self.shelf.keys()
+
+
+class Request(object):
+ """web service operation."""
+
+ def __init__(self, whatcd, type, path, data, headers):
+
+ self.whatcd = whatcd
+ self.utils = Utils()
+ self.type = type
+ self.path = path
+ self.data = data
+ self.headers = headers
+ #enable catching?
+ if whatcd.isCachingEnabled():
+ self.cache = whatcd.getCacheBackend()
+
+ def getCacheKey(self):
+ """The cache key is a md5 hash of request params."""
+
+ key = self.type + self.path + self.data
+ return Utils().md5(key)
+
+ def getCachedResponse(self):
+ """Returns a file object of the cached response."""
+
+ if not self.isCached():
+ response = self.downloadResponse()
+ self.cache.setHTML(self.getCacheKey(), response)
+ return self.cache.getHTML(self.getCacheKey())
+
+ def isCached(self):
+ """Returns True if the request is already in cache."""
+
+ return self.cache.hasKey(self.getCacheKey())
+
+ def downloadResponse(self):
+ """Returns a ResponseBody object from the server."""
+
+ #print "downloading from %s" % (self.path)
+ conn = httplib.HTTPSConnection(self.whatcd.site)
+ rb = ResponseBody()
+
+ if self.whatcd.isProxyEnabled():
+ conn = httplib.HTTPSConnection(host=self.whatcd.getProxy()[0], port=self.whatcd.getProxy()[1])
+ conn.request(method=self.type, url="https://" + self.whatcd.site + self.path, body=self.data, headers=self.headers)
+ else:
+ conn.request(self.type, self.path, self.data, self.headers)
+
+ response = conn.getresponse()
+ rb.headers = response.getheaders()
+ # Rip all inline JavaScript out of the response in case it hasn't been properly escaped
+ rb.body = re.sub('', '', response.read())
+ conn.close()
+ return rb
+
+ def execute(self, cacheable=False):
+ """Depending if caching is enabled, returns response from the server or, if available, the cached response"""
+ if self.whatcd.isCachingEnabled() and cacheable:
+ response = self.getCachedResponse()
+ else:
+ response = self.downloadResponse()
+
+ return response
+
+class Authenticate(WhatBase):
+
+ def __init__(self, whatcd):
+ """Create an authenticated user object.
+ # Parameters:
+ * whatcd object: WhatCD object.
+ """
+ self.whatcd = whatcd
+ self.parser = Parser(whatcd)
+ if not self.whatcd.isAuthenticated():
+ self.getAuthenticatedHeader()
+
+ def setCookie(self):
+ print "creating cookie"
+ f = open('cookie', 'w')
+ loginform = {'username': self.whatcd.username, 'password': self.whatcd.password\
+ , 'keeplogged': '1', 'login': 'Login'}
+ data = urllib.urlencode(loginform)
+ response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True)
+ try:
+ cookie = dict(response.headers)['set-cookie']
+ session = re.search("session=[^;]+", cookie).group(0)
+ self.whatcd.headers["Cookie"] = session
+ homepage = response.body
+ pickle.dump(self.whatcd.headers, f)
+ except (KeyError, AttributeError):
+ print "Login failed, most likely bad creds or the site is down, nothing to do"
+ f.close()
+ os.remove('cookie')
+ self.whatcd.headers = None
+ quit()
+ f.close()
+
+
+ def getAuthenticatedHeader(self):
+ """
+ Log user in what.cd and returns the authenticated header
+ """
+ homepage = None
+ if os.path.exists("cookie"):
+ f = open("cookie", "r")
+ try:
+ self.whatcd.headers = pickle.load(f)
+ except EOFError:
+ f.close()
+ os.remove("cookie")
+ print "invalid cookie, removed"
+ self.setCookie()
+ else:
+ self.setCookie()
+ #set authenticated user info
+ if 'id' not in self.whatcd.authenticateduserinfo:
+ self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage)
+
+ return self.whatcd.headers
+
+ def getAuthenticatedUserInfo(self, homepage=None):
+ """
+ Returns authenticated user's info
+ """
+ if not homepage:
+ homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body)
+ authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"}))
+ return authuserinfo
+
+ def getAuthenticatedUserId(self):
+ """
+ Returns authenticated user's id
+ """
+ return self.whatcd.authenticateduserinfo["id"]
+
+ def getAuthenticatedUserAuthCode(self):
+ """
+ Returns authenticated user's authcode
+ """
+ return self.whatcd.authenticateduserinfo["authcode"]
+
+
+ def getAuthenticatedUserUpload(self):
+ """
+ Returns authenticated user's total uploaded data
+ """
+ return self.whatcd.authenticateduserinfo["uploaded"]
+
+
+ def getAuthenticatedUserDownload(self):
+ """
+ Returns authenticated user's total downloaded data
+ """
+ return self.whatcd.authenticateduserinfo["downloaded"]
+
+
+ def getAuthenticatedUserRatio(self):
+ """
+ Returns authenticated user's ratio
+ """
+ return self.whatcd.authenticateduserinfo["ratio"]
+
+ def getAuthenticatedUserRequiredRatio(self):
+ """
+ Returns authenticated user's required ratio
+ """
+ return self.whatcd.authenticateduserinfo["required"]
+
+
+class User(WhatBase):
+ """A What.CD user"""
+
+ def __init__(self, username, whatcd):
+ """Create an user object.
+ # Parameters:
+ * username str: The user's name.
+ - whatcd object: the what.cd network object
+ """
+ WhatBase.__init__(self, whatcd)
+ self.name = username
+ self.whatcd = whatcd
+ self.userpage = "/user.php?"
+ self.userid = None
+ self.userinfo = None
+
+ def getUserName(self):
+ """
+ Returns user's name
+ """
+ return self.username
+
+ def getUserId(self):
+ """
+ Returns user's id, None if user doesn't exists
+ """
+ if self.userid:
+ return self.userid
+ else:
+ idform = {'action': "search", 'search': self.name}
+ data = urllib.urlencode(idform)
+ headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers
+ if dict(headers) is None:
+ return None
+ else:
+ self.userid = dict(headers)['location'][12:]
+ return self.userid
+
+ def getInfo(self):
+ """
+ Returns a dictionary of {percentile:{dataup str,
+ datadown str,
+ overall str,
+ postmade str,
+ boutyspent str,
+ reqfilled str,
+ artistsadded str,
+ torrentsup str},
+ stats: {uploaded str,
+ ratio str,
+ joined str,
+ downloaded str,
+ lastseen str,
+ rratio str},
+ community: {uploaded tuple(total str, url str),
+ forumposts tuple(total str, url str),
+ invited tuple (total,None),
+ perfectflacs tuple(total str, url str),
+ contributedcollages tuple(total str, url str),
+ reqvoted tuple(total str, url str),
+ uniquegroups tuple(total str, url str)
+ torrentscomments tuple(total str, url str),
+ snatched tuple(total str, url str),
+ artists str,
+ reqfilled tuple(total str, url str),
+ startedcollages tuple(total str, url str),
+ leeching tuple(total str, url str),
+ seeding tuple(total str, url str)}
+ }
+ If paranoia is not Off, it returns None.
+ """
+ if self.getUserId():
+ form = {'id': self.getUserId()}
+ data = urllib.urlencode(form)
+ userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body)
+ info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name)
+ self.userinfo = info
+ return info
+ else:
+ print "no user id retrieved"
+ return None
+
+
+ def getTorrentsSeeding(self, page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid, page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+ def getTorrentsSnatched(self, page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid, page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+ def getTorrentsUploaded(self, page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid, page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+
+ def getTorrentsCommented(self, page=1):
+ """
+ Returns a list with all user's commented torrents
+ in form of dictionary {postid, torrentid, comment,postdate, pages}
+
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+
+ url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1], page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().postsList(torrentspage)
+
+
+
+ ###############################################
+ # specific values #
+ ###############################################
+
+
+ def specificUserInfo(self):
+ """
+ Returns specific attributes of user info. None if user's paranoia is on
+ """
+ info = SpecificInformation()
+ # Initialize attributes
+ info.joindate, info.lastseen, info.dataup, info.datadown,\
+ info.ratio, info.rratio, info.uppercentile, info.downpercentile,\
+ info.torrentsuppercentile, info.reqfilledpercentile, info.bountyspentpercentile,\
+ info.postsmadepercentile, info.artistsaddedpercentile, info.overallpercentile,\
+ info.postsmadecom, info.torrentscommentscom, info.collagesstartedcom, info.collagescontrcon,\
+ info.reqfilledcom, info.reqvotedcom, info.uploadedcom, info.uniquecom, info.perfectcom,\
+ info.seedingcom, info.leechingcom, info.snatchedcom, info.invitedcom, info.artistsaddedcom\
+ = (None, None, None, None, None, None, None, None, None, None, None, None, None, None,\
+ None, None, None, None, None, None, None, None, None, None, None, None, None, None)
+
+
+ if not self.userinfo and self.getInfo() is None:
+ pass
+ else:
+ ######## stats ###########
+ info.joindate = self.userinfo['stats']['joined']
+ info.lastseen = self.userinfo['stats']['lastseen']
+ info.dataup = self.userinfo['stats']['uploaded']
+ info.datadown = self.userinfo['stats']['downloaded']
+ info.ratio = self.userinfo['stats']['ratio']
+ info.rratio = self.userinfo['stats']['rratio']
+ ######## percentile ###########
+ info.uppercentile = self.userinfo['percentile']['dataup']
+ info.downpercentile = self.userinfo['percentile']['datadown']
+ info.torrentsuppercentile = self.userinfo['percentile']['torrentsup']
+ info.reqfilledpercentile = self.userinfo['percentile']['reqfilled']
+ info.bountyspentpercentile = self.userinfo['percentile']['bountyspent']
+ info.postsmadepercentile = self.userinfo['percentile']['postsmade']
+ info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded']
+ info.overallpercentile = self.userinfo['percentile']['overall']
+ ######## community ###########
+ info.postsmadecom = self.userinfo['community']['forumposts']
+ info.torrentscomments = self.userinfo['community']['torrentscomments']
+ info.collagesstartedcom = self.userinfo['community']['startedcollages']
+ info.collagescontrcon = self.userinfo['community']['contributedcollages']
+ info.reqfilledcom = self.userinfo['community']['reqfilled']
+ info.reqvotedcom = self.userinfo['community']['reqvoted']
+ info.uploadedcom = self.userinfo['community']['uploaded']
+ info.uniquecom = self.userinfo['community']['uniquegroups']
+ info.perfectcom = self.userinfo['community']['pefectflacs']
+ info.seedingcom = self.userinfo['community']['seeding']
+ info.leechingcom = self.userinfo['community']['leeching']
+ info.snatchedcom = self.userinfo['community']['snatched']
+ info.invitedcom = self.userinfo['community']['invited'][0]
+ info.artistsaddedcom = self.userinfo['community']['artists']
+
+
+
+ return info
+
+
+class Torrent(WhatBase):
+ """A What.CD torrent"""
+
+ def __init__(self, id, page, isparent, whatcd):
+ """Create a torrent object.
+ # Parameters:
+ * id str: The torrent's id.
+ * whatcd object: the WhatCD network object
+ * page: The torrent page's number [optional]
+ """
+ WhatBase.__init__(self, whatcd)
+ self.id = id
+ self.page = page
+ self.whatcd = whatcd
+ self.isParent = isparent
+ self.torrentpage = "/torrents.php?"
+ self.torrentinfo = self.getInfo()
+
+
+ def getTorrentUrl(self):
+ """
+ Returns torrent's URL
+ """
+ if self.isParent:
+ form = {'id': self.id, 'page':self.page}
+ data = urllib.urlencode(form)
+ return self.torrentpage + data
+ else:
+ form = {'torrentid': self.id, 'page':self.page}
+ data = urllib.urlencode(form)
+ headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers
+
+ if dict(headers) is None:
+ return None
+ else:
+ if 'location' not in dict(headers).keys():
+ return None
+ else:
+ return dict(headers)['location']
+
+
+ def getInfo(self):
+ """
+ Returns a dictionnary with torrents's info
+ """
+ if self.getTorrentUrl() is None:
+ print "no torrent retrieved with such id"
+ return None
+
+ torrentpage = BeautifulSoup(self._request("GET", "/" + self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body)
+
+ if 'Site log' in torrentpage.find("title").string:
+ print "no torrent retrieved with such id"
+ return None
+ else:
+ return self._parser().torrentInfo(torrentpage, self.id, self.isParent)
+
+
+ def getTorrentParentId(self):
+ """
+ Returns torrent's group id
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['parentid']
+
+ def getTorrentDownloadURL(self):
+ """
+ Returns relative url to download the torrent
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['downloadurl']
+
+ def getTorrentDetails(self):
+ """
+ Returns torrent's details (format / bitrate)
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['details']
+
+ def getTorrentEditionInfo(self):
+ """
+ Returns torrent's edition info (Edition information / media type)
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['editioninfo']
+
+ def getTorrentMediaType(self):
+ """
+ Returns torrent's media type
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['rlsmedia']
+
+ def getTorrentSize(self):
+ """
+ Returns torrent's size
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['size']
+
+
+ def getTorrentSnatched(self):
+ """
+ Returns torrent's total snatches
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['snatched']
+
+
+ def getTorrentSeeders(self):
+ """
+ Returns torrent's current seeders
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['seeders']
+
+ def getTorrentLeechers(self):
+ """
+ Returns torrent's current leechers
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['leechers']
+
+ def getTorrentUploadedBy(self):
+ """
+ Returns torrent's uploader
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['uploadedby']
+
+ def getTorrentFolderName(self):
+ """
+ Returns torrent's folder name
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['foldername']
+
+ def getTorrentFileList(self):
+ """
+ Returns torrent's file list
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['filelist']
+
+
+ def getTorrentReleaseType(self):
+ """
+ Returns torrent's release type
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['rlstype']
+
+ def getTorrentDescription(self):
+ """
+ Returns torrent's description / empty string is there's none
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['torrentdescription']
+
+ def getTorrentComments(self):
+ """
+ Returns a list of dictionnaries with each comment in the torrent page
+ {postid,post,userid,username}
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['comments']
+
+ def getTorrentCommentsPagesNumber(self):
+ """
+ Returns number of pages of comments in the torrent
+ """
+ if self.torrentinfo:
+ return self.torrentInfo['torrent']['commentspages']
+
+ def isTorrentFreeLeech(self):
+ """
+ Returns True if torrent is freeleeech, False if not
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['isfreeleech']
+
+ def isTorrentReported(self):
+ """
+ Returns True if torrent is reported, False if not
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['isreported']
+
+
+class Artist(WhatBase):
+ """A What.CD artist"""
+
+ def __init__(self, name, whatcd):
+ """Create an artist object.
+ # Parameters:
+ * name str: The artist's name.
+ * whatcd object: The WhatCD network object
+ """
+ WhatBase.__init__(self, whatcd)
+ self.name = name
+ self.whatcd = whatcd
+ self.artistpage = "/artist.php"
+ self.utils = Utils()
+ self.info = self.getInfo()
+
+
+ def getArtistName(self):
+ """
+ Returns artist's name
+ """
+ return self.name
+
+ def getArtistId(self):
+ """
+ Returns artist's id, None if artist's not found
+ """
+ form = {'artistname': self.name}
+ data = urllib.urlencode(form)
+ headers = self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).headers
+ if dict(headers)['location'][0:14] != 'artist.php?id=':
+ return None
+ else:
+ return dict(headers)['location'][14:]
+
+ def getInfo(self):
+ """
+ Returns artist's info, None if there isn't
+ """
+ if self.getArtistId():
+ form = {'id': self.getArtistId()}
+ data = urllib.urlencode(form)
+ artistpage = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body)
+ return self._parser().artistInfo(artistpage)
+ else:
+ print "no artist info retrieved"
+ return None
+
+ def getArtistReleases(self):
+ """
+ Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id}
+ """
+ return self.info['releases']
+
+ def getArtistImage(self):
+ """
+ Return the artist image URL, None if there's no image
+ """
+ return self.info['image']
+
+ def getArtistInfo(self):
+ """
+ Return the artist's info, blank string if none
+ """
+ return self.info['info']
+
+ def getArtistTags(self):
+ """
+ Return a list with artist's tags
+ """
+ return self.info['tags']
+
+ def getArtistSimilar(self):
+ """
+ Return a list with artist's similar artists
+ """
+ return self.info['similarartists']
+
+ def getArtistRequests(self):
+ """
+ Returns a list with all artist's requests in form of dictionary {requestname, id}
+ """
+ return self.info['requests']
+
+ def setArtistInfo(self, id, info):
+ """
+ Updates what.cd artist's info and image
+ Returns 1 if artist info updated succesfully, 0 if not.
+ # Parameters:
+ * id str: what.cd artist's id
+ * info tuple: (The artist's info -str-, image url -str- (None if there isn't))
+ """
+ if info[0]:
+ params = {'action': 'edit', 'artistid':id}
+ data = urllib.urlencode(params)
+
+ edit_page = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body)
+ what_form = self._parser().whatForm(edit_page, 'edit')
+ if info[1]:
+ image_to_post = info[1]
+ else:
+ image_to_post = what_form['image']
+ data_to_post = {'body': info[0].encode('utf-8'),
+ 'summary':'automated artist info insertion',\
+ 'image':image_to_post,\
+ 'artistid':what_form['artistid'],\
+ 'auth':what_form['auth'],\
+ 'action':what_form['action']}
+
+ #post artist's info
+ self.whatcd.headers['Content-type'] = "application/x-www-form-urlencoded"
+ response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False)
+ artist_id_returned = dict(response.headers)['location'][14:]
+
+ if str(artist_id_returned) == str(what_form['artistid']):
+ return 1
+ else:
+ return 0
+
+ else:
+ return 'no artist info provided. Aborting.'
+ exit()
+
+
+class Parser(object):
+
+ def __init__(self, whatcd):
+ self.utils = Utils()
+ self.whatcd = whatcd
+ self.totalpages = 0
+
+ def authenticatedUserInfo(self, dom):
+ """
+ Parse the index page and returns a dictionnary with basic authenticated user information
+ """
+ userInfo = {}
+ soup = BeautifulSoup(str(dom))
+ for ul in soup.fetch('ul'):
+ if ul["id"] == "userinfo_username":
+ #retrieve user logged id
+ hrefid = ul.findAll('li')[0].find("a")["href"]
+ regid = re.compile('[0-9]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["id"] = regid.search(hrefid).group(0)
+ print "User id: %s" % userInfo["id"]
+
+ #retrieve user logged id
+ hrefauth = ul.findAll('li')[2].find("a")["href"]
+ print hrefauth
+ regauth = re.compile('=[0-9a-zA-Z]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:]
+
+ elif ul["id"] == "userinfo_stats":
+ if len(ul.findAll('li')) > 0:
+ userInfo["uploaded"] = ul.findAll('li')[0].find("span").string
+ userInfo["downloaded"] = ul.findAll('li')[1].find("span").string
+ userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string
+ userInfo["required"] = ul.findAll('li')[3].find("span").string
+ userInfo["authenticate"] = True
+
+ return userInfo
+
+ def userInfo(self, dom, user):
+ """
+ Parse an user's page and returns a dictionnary with its information
+
+ # Parameters:
+ * dom str: user page html
+ * user str: what.cd username
+ """
+ userInfo = {'stats':{}, 'percentile':{}, 'community':{}}
+ soup = BeautifulSoup(str(dom))
+
+ for div in soup.fetch('div', {'class':'box'}):
+
+ #if paronoia is not set to 'Off', stop collecting data
+ if div.findAll('div')[0].string == "Personal":
+ if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off":
+ return None
+
+ statscontainer = soup.findAll('div', {'class':'box'})[1]
+ percentilecontainer = soup.findAll('div', {'class':'box'})[2]
+ communitycontainer = soup.findAll('div', {'class':'box'})[4]
+
+
+ userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title']
+ userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title']
+ userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:]
+ userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:]
+ userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string
+ userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:]
+ userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:]
+ userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:]
+ userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:]
+ userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:]
+ userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:]
+ userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:]
+ userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:]
+ userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:]
+
+ userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\
+ communitycontainer.findAll('li')[0].find('a')['href'])
+ userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\
+ communitycontainer.findAll('li')[1].find('a')['href'])
+ userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\
+ communitycontainer.findAll('li')[2].find('a')['href'])
+ userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\
+ communitycontainer.findAll('li')[3].find('a')['href'])
+ userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\
+ communitycontainer.findAll('li')[4].find('a')['href'])
+ userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\
+ communitycontainer.findAll('li')[5].find('a')['href'])
+ userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\
+ communitycontainer.findAll('li')[6].find('a')['href'])
+ userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\
+ communitycontainer.findAll('li')[7].find('a')['href'])
+ userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\
+ communitycontainer.findAll('li')[8].find('a')['href'])
+ userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\
+ communitycontainer.findAll('li')[9].find('a')['href'])
+ userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\
+ communitycontainer.findAll('li')[10].find('a')['href'])
+ #NB: there's a carriage return and white spaces inside the snatched li tag
+ userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\
+ communitycontainer.findAll('li')[11].find('a')['href'])
+ userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\
+ None)
+ userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title']
+
+ return userInfo
+
+ def torrentInfo(self, dom, id, isparent):
+ """
+ Parse a torrent's page and returns a dictionnary with its information
+ """
+
+ torrentInfo = {'torrent':{}}
+ torrentfiles = []
+ torrentdescription = ""
+ isreported = False
+ isfreeleech = False
+ soup = BeautifulSoup(str(dom))
+ if isparent:
+ torrentInfo['torrent']['parentid'] = id
+ else:
+ groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href']
+ torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1]
+ regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray')
+ torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0)
+ torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=") + 1:]
+ torrentInfo['torrent']['downloadurl'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a', {'title':'Download'})[0]['href']
+ ## is freeleech or/and reported? ##
+ #both
+ if len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 4:
+ isreported = True
+ isfreeleech = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ #either
+ elif len(soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents) == 2:
+ if soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Reported':
+ isreported = True
+ elif soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!':
+ isreported = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ #none
+ else:
+ torrentInfo['torrent']['details'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('a')[-1].contents[0]
+ torrentInfo['torrent']['isfreeleech'] = isfreeleech
+ torrentInfo['torrent']['isreported'] = isreported
+ torrentInfo['torrent']['size'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[1].string
+ torrentInfo['torrent']['snatched'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[2].string
+ torrentInfo['torrent']['seeders'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[3].string
+ torrentInfo['torrent']['leechers'] = soup.findAll('tr', {'id':'torrent%s' % id})[0].findAll('td')[4].string
+ torrentInfo['torrent']['uploadedby'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('a')[0].string
+ foldername = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('div')[1].string
+ if(foldername is None):
+ torrentInfo['torrent']['foldername'] = None
+ else:
+ torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername)
+ files = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('tr')
+ for file in files[1:-1]:
+ torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string))
+ torrentInfo['torrent']['filelist'] = torrentfiles
+ #is there any description?
+ if len(soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')) > 1:
+ description = torrentInfo['torrent']['description'] = soup.findAll('tr', {'id':'torrent_%s' % id})[0].findAll('blockquote')[1].contents
+ info = ''
+ for content in description:
+ if content.string:
+ info = "%s%s" % (info, self.utils._string(content.string))
+ torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string))
+ torrentInfo['torrent']['torrentdescription'] = torrentdescription
+ regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown')
+ torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0)
+
+ torrentInfo['torrent']['comments'] = []
+ torrentInfo['torrent']['commentspages'] = 0
+
+ if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0:
+ linkbox = dom.findAll("div", {"class": "linkbox"})[-1]
+ pages = 1
+ postid = ''
+ userid = ''
+ post = ''
+ # if there's more than 1 page of torrents
+ if linkbox.find("a"):
+ # by default torrent page show last page of comments
+ lastpage = linkbox.findAll("a")[-1]['href']
+ pages = int(lastpage[18:lastpage.find('&')]) + 1
+ for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}):
+ postid = comment.find("a", {"class":"post_id"}).string[1:]
+ userid = comment.findAll("a")[1]['href'][12:]
+ username = comment.findAll("a")[1].string
+ post = comment.find("div", {"id":"content" + postid})
+ post = u''.join([post.string for post in post.findAll(text=True)])
+ torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username})
+
+ torrentInfo['torrent']['commentspages'] = pages
+
+ return torrentInfo
+
+ def artistInfo(self, dom):
+ """
+ Parse an artist's page and returns a dictionnary with its information
+ """
+ artistInfo = {}
+ releases = []
+ requests = []
+ infoartist = ""
+ tagsartist = []
+ similarartists = []
+ soup = BeautifulSoup(str(dom))
+ for releasetype in soup.fetch('table', {'class':'torrent_table'}):
+ releasetypenames = releasetype.findAll('strong')
+ releasetypename = releasetype.findAll('strong')[0].string
+ for release in releasetypenames[1:-1]:
+ #skip release edition info and Freeleech! s
+ if len(release.parent.contents) > 1 and len(release.contents) > 1:
+ releaseyear = release.contents[0][0:4]
+ releasename = release.contents[1].string
+ releasehref = release.contents[1]['href']
+ releaseid = releasehref[releasehref.rfind('=') + 1:]
+ releases.append({'releasetype':releasetypename,\
+ 'year': releaseyear, 'name':self.utils.decodeHTMLEntities(releasename), 'id':releaseid})
+
+ artistInfo['releases'] = releases
+ #is there an artist image?
+ artistInfo['image'] = None
+ if soup.find('div', {'class':'box'}).find('img'):
+ artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src']
+ #is there any artist info?
+ contents = soup.find('div', {'class':'body'}).contents
+ if len(contents) > 0:
+ for content in contents:
+ if content.string:
+ infoartist = "%s%s" % (infoartist, self.utils._string(content.string))
+ artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist)
+ #is there any artist tags?
+ if soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li'):
+ ul = soup.findAll('ul', {'class':'stats nobullet'})[0].findAll('li')
+ for li in ul:
+ if li.contents[0].string:
+ tagsartist.append(self.utils._string(li.contents[0].string))
+ artistInfo['tags'] = tagsartist
+ #is there any similar artist?
+ if soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'}):
+ artists = soup.findAll('ul', {'class':'stats nobullet'})[2].findAll('span', {'title':'2'})
+ for artist in artists:
+ if artist.contents[0].string:
+ similarartists.append(self.utils._string(artist.contents[0].string))
+ artistInfo['similarartists'] = similarartists
+ #is there any request?
+ if soup.find('table', {'id':'requests'}):
+ for request in soup.find('table', {'id':'requests'}).findAll('tr', {'class':re.compile('row')}):
+ requests.append({'requestname':request.findAll('a')[1].string, 'id':request.findAll('a')[1]['href'][28:]})
+
+ artistInfo['requests'] = requests
+
+ return artistInfo
+
+ def torrentsList(self, dom):
+ """
+ Parse a torrent's list page and returns a dictionnary with its information
+ """
+ torrentslist = []
+ torrentssoup = dom.find("table", {"width": "100%"})
+ pages = 0
+
+ #if there's at least 1 torrent in the list
+ if torrentssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+ pages = 1
+ regyear = re.compile('\[\d{4}\]')
+
+ #is there a page navigation bar?
+ if navsoup.contents:
+ #if there's more than 1 page of torrents
+ if navsoup.contents[-1].has_key('href'):
+ lastpage = navsoup.contents[-1]['href']
+ pages = lastpage[18:lastpage.find('&')]
+ self.totalpages = pages
+ else: #we are at the last page, no href
+ pages = self.totalpages + 1
+ #fetch all tr except first one (column head)
+ for torrent in torrentssoup.fetch('tr')[1:]:
+ #exclude non music torrents
+ if torrent.find('td').find('div')['class'][0:10] == 'cats_music':
+
+ torrenttag = torrent.find('td').contents[1]['title']
+ torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href']
+ torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href']
+ torrentid = torrentrm[torrentrm.rfind('=') + 1:]
+ torrenttd = torrent.findAll('td')[1]
+
+ # remove dataless elements
+ torrenttags = torrenttd.div
+ rightlinks = torrenttd.span
+ torrenttags.extract()
+ rightlinks.extract()
+
+ # remove line breaks
+ torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")])
+ torrenttd = BeautifulSoup(torrenttd)
+ isScene = False
+ info = ""
+
+ if len(torrenttd.findAll('a')) == 2:
+ #one artist
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),)
+ artistid = (torrenttd.find("a")['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[1].string
+ info = torrenttd.findAll("a")[1].nextSibling.string.strip()
+
+
+ elif len(torrenttd.findAll('a')) == 1:
+ #various artists
+ torrentartist = ('Various Artists',)
+ artistid = ()
+ torrentalbum = torrenttd.find("a").string
+ info = torrenttd.find("a").nextSibling.string.strip()
+
+ elif len(torrenttd.findAll('a')) == 3:
+ #two artists
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string),\
+ self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string))
+ artistid = (torrenttd.findAll("a")[0]['href'][14:],\
+ torrenttd.findAll("a")[1]['href'][14:])
+ torrentalbum = torrenttd.findAll("a")[2].string
+ info = torrenttd.findAll("a")[2].nextSibling.string.strip()
+
+ elif torrenttd.find(text=re.compile('performed by')):
+ #performed by
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),)
+ artistid = (torrenttd.findAll("a")[-2]['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[-1].string
+ info = torrenttd.findAll("a")[-1].nextSibling.string.strip()
+
+ if 'Scene' in info:
+ isScene = True
+
+ torrentyear = regyear.search(info).group(0)[1:5]
+ torrentslist.append({'tag':torrenttag,\
+ 'dlurl':torrentdl,\
+ 'id':torrentid,\
+ 'artist':torrentartist,\
+ 'artistid':artistid,\
+ 'album':self.utils.decodeHTMLEntities(torrentalbum),
+ 'year':torrentyear,
+ 'pages':pages,
+ 'scene':isScene})
+
+ return torrentslist
+
+ def postsList(self, dom):
+ """
+ Parse a post list page and returns a dictionnary with each post information:
+ {torrentid, commentid, postid}
+ """
+ postslist = []
+ postssoup = dom.find("div", {"class": "thin"})
+ pages = 0
+
+ #if there's at least 1 post in the list
+ if postssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+
+ #if there's more than 1 page of torrents
+ if navsoup.find("a"):
+ lastpage = navsoup.findAll("a")[1]['href']
+ pages = lastpage[18:lastpage.find('&')]
+ self.totalpages = pages
+ else: #we are at the last page, no link
+ pages = 1
+
+ for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}):
+ commentbody = post.find("td", {"class":"body"})
+ postid = post.find("span").findAll("a")[0].string[1:]
+ torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=') + 1:]
+ comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)])
+ postdate = post.find("span", {"class":"time"})['title']
+ postslist.append({'postid':postid,\
+ 'torrentid':torrentid,\
+ 'comment':comment,\
+ 'postdate':postdate,\
+ 'pages':pages})
+
+
+ return postslist
+
+
+ def whatForm(self, dom, action):
+ """
+ Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values
+ # Parameters:
+ * dom str: the edit page dom.
+ + action str: the action value from the requested form
+ """
+ inputs = {}
+
+ form = dom.find('input', {'name':'action', 'value':action}).parent
+ elements = form.fetch(('input', 'textarea'))
+ #get all form elements except for submit input
+ for element in elements[0:3]:
+ name = element.get('name', None)
+ if element.name == 'textarea':
+ inputs[name] = element.string
+ else:
+ inputs[name] = element.get('value', None)
+ return inputs
+
+
+
+if __name__ == "__main__":
+ print "Module to manage what.cd as a web service"