diff --git a/headphones/__init__.py b/headphones/__init__.py
index 761e57db..bf9738b9 100644
--- a/headphones/__init__.py
+++ b/headphones/__init__.py
@@ -155,8 +155,8 @@ WAFFLES = None
WAFFLES_UID = None
WAFFLES_PASSKEY = None
WHATCD = None
-WHATCD_UID = None
-WHATCD_PASSKEY = None
+WHATCD_USERNAME = None
+WHATCD_PASSWORD = None
DOWNLOAD_TORRENT_DIR = None
INTERFACE = None
@@ -251,7 +251,7 @@ def initialize():
LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \
RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, \
ADD_ALBUM_ART, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \
- TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_UID, WHATCD_PASSKEY, DOWNLOAD_TORRENT_DIR, \
+ TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_USERNAME, WHATCD_PASSWORD, DOWNLOAD_TORRENT_DIR, \
LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \
NZBMATRIX, NZBMATRIX_USERNAME, NZBMATRIX_APIKEY, NEWZNAB, NEWZNAB_HOST, NEWZNAB_APIKEY, NEWZNAB_ENABLED, EXTRA_NEWZNABS,\
NZBSORG, NZBSORG_UID, NZBSORG_HASH, NEWZBIN, NEWZBIN_UID, NEWZBIN_PASSWORD, LASTFM_USERNAME, INTERFACE, FOLDER_PERMISSIONS, \
@@ -348,8 +348,8 @@ def initialize():
WAFFLES_PASSKEY = check_setting_str(CFG, 'Waffles', 'waffles_passkey', '')
WHATCD = bool(check_setting_int(CFG, 'What.cd', 'whatcd', 0))
- WHATCD_UID = check_setting_str(CFG, 'What.cd', 'whatcd_uid', '')
- WHATCD_PASSKEY = check_setting_str(CFG, 'What.cd', 'whatcd_passkey', '')
+ WHATCD_USERNAME = check_setting_str(CFG, 'What.cd', 'whatcd_username', '')
+ WHATCD_PASSWORD = check_setting_str(CFG, 'What.cd', 'whatcd_password', '')
SAB_HOST = check_setting_str(CFG, 'SABnzbd', 'sab_host', '')
SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '')
@@ -631,8 +631,8 @@ def config_write():
new_config['What.cd'] = {}
new_config['What.cd']['whatcd'] = int(WHATCD)
- new_config['What.cd']['whatcd_uid'] = WHATCD_UID
- new_config['What.cd']['whatcd_passkey'] = WHATCD_PASSKEY
+ new_config['What.cd']['whatcd_username'] = WHATCD_USERNAME
+ new_config['What.cd']['whatcd_password'] = WHATCD_PASSWORD
new_config['General']['search_interval'] = SEARCH_INTERVAL
new_config['General']['libraryscan_interval'] = LIBRARYSCAN_INTERVAL
diff --git a/headphones/searcher.py b/headphones/searcher.py
index ec5c86ea..d54d6d76 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -15,6 +15,7 @@
import urllib, urllib2, urlparse
import lib.feedparser as feedparser
+import lib.whatapi as whatapi
from xml.dom import minidom
from xml.parsers.expat import ExpatError
from StringIO import StringIO
@@ -808,69 +809,70 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
if headphones.WHATCD:
provider = "What.cd"
- providerurl = url_fix("https://www.what.cd/browse.php")
bitrate = None
if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
- format = "FLAC"
- bitrate = "(Lossless)"
+ format_regex = "FLAC"
maxsize = 10000000000
elif headphones.PREFERRED_QUALITY:
- format = "FLAC OR MP3"
+ format_regex = "(FLAC|MP3)"
maxsize = 10000000000
else:
- format = "MP3"
+ format_regex = "MP3"
maxsize = 300000000
- query_items = ['artist:"%s"' % artistterm,
- 'album:"%s"' % albumterm,
- 'format:(%s)' % format,
- 'size:[0 TO %d]' % maxsize,
- '-seeders:0'] # cut out dead torrents
- if bitrate:
- query_items.append('bitrate:"%s"' % bitrate)
-
- params = {
- "uid": headphones.WHATCD_UID,
- "passkey": headphones.WHATCD_PASSKEY,
- "rss": "1",
- "c0": "1",
- "s": "seeders", # sort by
- "d": "desc" # direction
- }
-
- searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items)))
-
try:
- data = urllib2.urlopen(searchURL, timeout=20).read()
- except urllib2.URLError, e:
- logger.warn('Error fetching data from %s: %s' % (provider, e))
- data = False
+ whatcd = whatapi.getWhatcdNetwork(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD)
+ except:
+ whatcd = None
+ logger.warn("What.cd credentials incorrect or site is down.")
- if data:
+ if whatcd:
+ whatcd.enableCaching()
- d = feedparser.parse(data)
- if not len(d.entries):
- logger.info(u"No results found from %s for %s" % (provider, term))
- pass
+ artist = whatcd.getArtist(artistterm)
+ artist_id = artist.getArtistId()
+ else:
+ artist_id = None
- else:
- for item in d.entries:
- try:
- title_match = re.search(r"(.+)\[(.+)\]$", item.title)
- title = title_match.group(1).strip()
- details = title_match.group(2).split("-")
+ if artist_id: # will be None if artist not found
+ logger.info(u"What.cd artist ID: %s" % artist_id)
+ artist_releases = artist.getArtistReleases()
+ logger.info(u"Found %d releases on what.cd for %s" % (len(artist_releases), artistterm))
+ #Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id}
+ else:
+ artist_releases = []
- desc_match = re.search(r"Size: (\d+)<", item.description)
- size = desc_match.group(1)
+ possible_matches = [ release for release in artist_releases if albumterm in release['name'] ]
- url = item.link
+ # cap at 10 matches, 1 per second to reduce hits on API...don't wanna get in trouble.
+ # Might want to turn up number of matches later.
+ max_torrent_info_reads = 10
+ info_read_rate = 1
- resultlist.append((title, size, url, provider))
- logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
- except Exception, e:
- logger.error(u"An error occurred while trying to parse the response from What.cd: %s" % e)
+ match_torrents = []
+ for i, release in enumerate(possible_matches[:max_torrent_info_reads]):
+ if i > 0:
+ time.sleep(info_read_rate)
+ match_torrents.append(whatcd.getTorrent(release['id']))
+ # filter on format, size, and num seeders
+ match_torrents = [ torrent for torrent in match_torrents
+ if re.search(format_regex, torrent.getTorrentDetails(), flags=re.I)
+ and torrent.getTorrentSize() <= maxsize
+ and torrent.getTorrentSeeders() >= minimumseeders ]
+
+ # sort by times d/l'd
+ if not len(possible_matches):
+ logger.info(u"No results found from %s for %s after filtering" % (provider, term))
+ elif len(match_torrents) > 1:
+ match_torrents.sort(match_torrents, key=whatapi.Torrent.getTorrentSeeders)
+
+ for torrent in match_torrents:
+ resultlist.append((torrent.getTorrentFolderName(),
+ torrent.getTorrentSize(),
+ torrent.getTorrentDownloadURL(),
+ provider))
if headphones.ISOHUNT:
provider = "isoHunt"
diff --git a/headphones/webserve.py b/headphones/webserve.py
index 5a5cc31f..6e225166 100644
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -541,21 +541,21 @@ class WebInterface(object):
return serve_template(templatename="config.html", title="Settings", config=config)
config.exposed = True
-
-
+
+
def configUpdate(self, http_host='0.0.0.0', http_username=None, http_port=8181, http_password=None, launch_browser=0, api_enabled=0, api_key=None,
download_scan_interval=None, nzb_search_interval=None, libraryscan_interval=None, sab_host=None, sab_username=None, sab_apikey=None, sab_password=None,
sab_category=None, download_dir=None, blackhole=0, blackhole_dir=None, usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None,
newznab=0, newznab_host=None, newznab_apikey=None, newznab_enabled=0, nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, newzbin=0, newzbin_uid=None,
newzbin_password=None, preferred_quality=0, preferred_bitrate=None, detect_bitrate=0, move_files=0, torrentblackhole_dir=None, download_torrent_dir=None,
- numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, rename_files=0, correct_metadata=0,
- cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None,
- include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False,
- interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None,
- encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0, prowl_enabled=0, prowl_onsnatch=0,
- prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0, xbmc_notify=0, nma_enabled=False,
- nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None, customsleep=None, hpuser=None, hppass=None,
- preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs):
+ numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, whatcd=0, whatcd_uid=None, whatcd_passkey=None,
+ rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None,
+ folder_format=None, file_format=None, include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0,
+ autowant_upcoming=False, autowant_all=False, interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None,
+ encoderfolder=None, advancedencoder=None, encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0,
+ prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0,
+ xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None,
+ customsleep=None, hpuser=None, hppass=None, preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs):
headphones.HTTP_HOST = http_host
headphones.HTTP_PORT = http_port
diff --git a/lib/whatapi.py b/lib/whatapi.py
new file mode 100755
index 00000000..bc6f4394
--- /dev/null
+++ b/lib/whatapi.py
@@ -0,0 +1,1442 @@
+# -*- coding: utf_8 -*-
+#################################################################################
+#
+# Name: whatapi.py
+#
+# Synopsis: Module to manage what.cd as a web service
+#
+# Description: See below list of the implemented webservices
+#
+# Copyright 2010 devilcius
+#
+# The Wide Open License (WOL)
+#
+# Permission to use, copy, modify, distribute and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice and this license appear in all source copies.
+# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
+# ANY KIND. See http://www.dspguru.com/wide-open-license for more information.
+#
+#################################################################################
+
+
+__author__="devilcius"
+__date__ ="$Oct 23, 2010 11:21:12 PM$"
+
+
+import hashlib
+try:
+ from BeautifulSoup import BeautifulSoup
+except:
+ raise ImportError,"Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download"
+import httplib
+import os
+import pickle
+import re
+import urllib
+import shelve
+import tempfile
+from htmlentitydefs import name2codepoint as n2cp
+
+
+"""
+A list of the implemented webservices (from what.cd )
+=====================================
+
+# User
+
+ * user.getUserId
+ * user.getInfo
+
+ * user.getTorrentsSeeding
+ * user.getTorrentsSnatched
+ * user.getTorrentsUploaded
+ * user.getTorrentsCommented
+
+ * user.specificUserInfo
+ Atributes:
+ ######## stats ###########
+ -joindate
+ -lastseen
+ -dataup
+ -datadown
+ -ratio
+ -rratio
+ ######## percentile ###########
+ -uppercentile
+ -downpercentile
+ -torrentsuppercentile
+ -reqfilledpercentile
+ -bountyspentpercentile
+ -postsmadepercentile
+ -artistsaddedpercentile
+ -overallpercentile
+ ######## community ###########
+ -postsmade
+ -torrentscomments
+ -collagesstarted
+ -collagescontr
+ -reqfilled
+ -reqvoted
+ -uploaded
+ -unique
+ -perfect
+ -seeding
+ -leeching
+ -snatched
+ -invited
+ -artistsadded
+
+
+# Artist
+
+ * artist.getArtistReleases
+ * artist.getArtistImage
+ * artist.getArtistInfo
+ * artist.getArtistTags
+ * artist.getArtistSimilar
+ * artist.getArtistRequests
+
+ + artist.setArtistInfo
+
+
+# Torrent
+
+ * torrent.getTorrentParentId
+ * torrent.getTorrentDownloadURL
+ * torrent.getTorrentDetails
+ * torrent.getTorrentSize
+ * torrent.getTorrentSnatched
+ * torrent.getTorrentSeeders
+ * torrent.getTorrentLeechers
+ * torrent.getTorrentUploadedBy
+ * torrent.getTorrentFolderName
+ * torrent.getTorrentFileList
+ * torrent.getTorrentDescription
+ * torrent.getTorrentComments
+ * torrent.isTorrentFreeLeech
+ * torrent.isTorrentReported
+
+
+# Authenticate
+
+ * authenticate.getAuthenticatedUserId
+ * authenticate.getAuthenticatedUserAuthCode
+ * authenticate.getAuthenticatedUserDownload
+ * authenticate.getAuthenticatedUserUpload()
+ * authenticate.getAuthenticatedUserRatio
+ * authenticate.getAuthenticatedUserRequiredRatio
+
+"""
+
+class ResponseBody:
+ """A Response Body Object"""
+ pass
+
+class SpecificInformation:
+ """A Specific Information Object"""
+ pass
+
+
+class WhatBase(object):
+ """An abstract webservices object."""
+ whatcd = None
+
+ def __init__(self, whatcd):
+ self.whatcd = whatcd
+ #if we are not autenticated in what.cd, do it now
+ if not self.whatcd.isAuthenticated():
+ print "authenticating..."
+ self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader()
+
+ def _request(self,type, path, data, headers):
+ return Request(self.whatcd,type,path,data,headers)
+
+ def _parser(self):
+ return Parser(self.whatcd)
+
+ def utils(self):
+ return Utils()
+
+
+class Utils():
+
+ def md5(self, text):
+ """Returns the md5 hash of a string."""
+
+ h = hashlib.md5()
+ h.update(self._string(text))
+
+ return h.hexdigest()
+
+ def _unicode(self, text):
+ if type(text) == unicode:
+ return text
+
+ if type(text) == int:
+ return unicode(text)
+
+ return unicode(text, "utf-8")
+
+ def _string(self, text):
+ if type(text) == str:
+ return text
+
+ if type(text) == int:
+ return str(text)
+
+ return text.encode("utf-8")
+
+ def _number(self,string):
+ """
+ Extracts an int from a string. Returns a 0 if None or an empty string was passed
+ """
+
+ if not string:
+ return 0
+ elif string == "":
+ return 0
+ else:
+ try:
+ return int(string)
+ except ValueError:
+ return float(string)
+
+ def substituteEntity(self, match):
+ ent = match.group(2)
+ if match.group(1) == "#":
+ return unichr(int(ent))
+ else:
+ cp = n2cp.get(ent)
+
+ if cp:
+ return unichr(cp)
+ else:
+ return match.group()
+
+ def decodeHTMLEntities(self, string):
+ entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
+ return entity_re.subn(self.substituteEntity, string)[0]
+
+
+
+class WhatCD(object):
+
+ def __init__(self, username, password, site, loginpage, headers):
+
+ #credentials
+ self.username = username
+ self.password = password
+ self.site = site
+ self.loginpage = loginpage
+ self.headers = headers
+ self.authenticateduserinfo = {}
+
+ self.cache_backend = None
+ self.proxy_enabled = False
+ self.proxy = None
+
+ def isAuthenticated(self):
+ """
+ Checks if we are authenticated in what.cd
+ """
+ if "id" in self.authenticateduserinfo:
+ return True
+ else:
+ return False
+
+ def getCredentials(self):
+ """
+ Returns an authenticated user credentials object
+ """
+ return Authenticate(self)
+
+
+ def getUser(self, username):
+ """
+ Returns an user object
+ """
+ return User(username, self)
+
+ def getTorrent(self, id, page=1):
+ """
+ Returns a torrent object
+ """
+ return Torrent(id, page, None, self)
+
+ def getTorrentGroup(self, id, page=1):
+ """
+ Returns a torrent object
+ """
+ return Torrent(id, page, True, self)
+
+ def getArtist(self, name):
+ """
+ Returns an artist object
+ """
+ return Artist(name, self)
+
+ def enableProxy(self, host, port):
+ """Enable a default web proxy"""
+ self.proxy = [host, Utils()._number(port)]
+ self.proxy_enabled = True
+
+ def disableProxy(self):
+ """Disable using the web proxy"""
+ self.proxy_enabled = False
+
+ def isProxyEnabled(self):
+ """Returns True if a web proxy is enabled."""
+ return self.proxy_enabled
+
+ def getProxy(self):
+ """Returns proxy details."""
+ return self.proxy
+
+ def enableCaching(self, file_path = None):
+ """Enables caching request-wide for all cachable calls.
+ * file_path: A file path for the backend storage file. If
+ None set, a temp file would probably be created, according the backend.
+ """
+ if not file_path:
+ file_path = tempfile.mktemp(prefix="whatapi_tmp_")
+
+ self.cache_backend = _ShelfCacheBackend(file_path)
+
+ def disableCaching(self):
+ """Disables all caching features."""
+ self.cache_backend = None
+
+ def isCachingEnabled(self):
+ """Returns True if caching is enabled."""
+
+ return not (self.cache_backend == None)
+
+ def getCacheBackend(self):
+
+ return self.cache_backend
+
+def getWhatcdNetwork(username = "", password = ""):
+ """
+ Returns a preconfigured WhatCD object for what.cd
+ # Parameters:
+ * username str: a username of a valid what.cd user
+ * password str: user's password
+ """
+
+ return WhatCD (
+ username = username,
+ password = password,
+ site = "ssl.what.cd",
+ loginpage = "/login.php",
+ headers = {
+ "Content-type": "application/x-www-form-urlencoded",
+ 'Accept-Charset': 'utf-8',
+ 'User-Agent': "whatapi [devilcius]"
+ })
+
+
+
+class _ShelfCacheBackend(object):
+ """Used as a backend for caching cacheable requests."""
+ def __init__(self, file_path = None):
+ self.shelf = shelve.open(file_path)
+
+ def getHTML(self, key):
+ return self.shelf[key]
+
+ def setHTML(self, key, xml_string):
+ self.shelf[key] = xml_string
+
+ def hasKey(self, key):
+ return key in self.shelf.keys()
+
+
+class Request(object):
+ """web service operation."""
+
+ def __init__(self, whatcd,type, path, data, headers):
+
+ self.whatcd = whatcd
+ self.utils = Utils()
+ self.type = type
+ self.path = path
+ self.data = data
+ self.headers = headers
+ #enable catching?
+ if whatcd.isCachingEnabled():
+ self.cache = whatcd.getCacheBackend()
+
+ def getCacheKey(self):
+ """The cache key is a md5 hash of request params."""
+
+ key = self.type + self.path + self.data
+ return Utils().md5(key)
+
+ def getCachedResponse(self):
+ """Returns a file object of the cached response."""
+
+ if not self.isCached():
+ response = self.downloadResponse()
+ self.cache.setHTML(self.getCacheKey(), response)
+ return self.cache.getHTML(self.getCacheKey())
+
+ def isCached(self):
+ """Returns True if the request is already in cache."""
+
+ return self.cache.hasKey(self.getCacheKey())
+
+ def downloadResponse(self):
+ """Returns a ResponseBody object from the server."""
+
+ #print "downloading from %s" % (self.path)
+ conn = httplib.HTTPSConnection(self.whatcd.site)
+ rb = ResponseBody()
+
+ if self.whatcd.isProxyEnabled():
+ conn = httplib.HTTPSConnection(host = self.whatcd.getProxy()[0], port = self.whatcd.getProxy()[1])
+ conn.request(method = self.type, url="https://" + self.whatcd.site + self.path, body = self.data, headers = self.headers)
+ else:
+ conn.request(self.type, self.path, self.data, self.headers)
+
+ response = conn.getresponse()
+ rb.headers = response.getheaders()
+ # Rip all inline JavaScript out of the response in case it hasn't been properly escaped
+ rb.body = re.sub('', '', response.read())
+ conn.close()
+ return rb
+
+ def execute(self, cacheable = False):
+ """Depending if caching is enabled, returns response from the server or, if available, the cached response"""
+ if self.whatcd.isCachingEnabled() and cacheable:
+ response = self.getCachedResponse()
+ else:
+ response = self.downloadResponse()
+
+ return response
+
+class Authenticate(WhatBase):
+
+ def __init__(self, whatcd):
+ """Create an authenticated user object.
+ # Parameters:
+ * whatcd object: WhatCD object.
+ """
+ self.whatcd = whatcd
+ self.parser = Parser(whatcd)
+ if not self.whatcd.isAuthenticated():
+ self.getAuthenticatedHeader()
+
+ def setCookie(self):
+ print "creating cookie"
+ f = open('cookie', 'w')
+ loginform= {'username': self.whatcd.username, 'password': self.whatcd.password \
+ , 'keeplogged': '1', 'login': 'Login'}
+ data = urllib.urlencode(loginform)
+ response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True)
+ try:
+ cookie=dict(response.headers)['set-cookie']
+ session=re.search("session=[^;]+", cookie).group(0)
+ self.whatcd.headers["Cookie"] = session
+ homepage = response.body
+ pickle.dump(self.whatcd.headers, f)
+ except (KeyError, AttributeError):
+ os.remove('cookie')
+ self.whatcd.headers = None
+# quit() # Commented out...can't have this quitting headphones on us
+ raise Exception("Login failed, most likely bad creds or the site is down, nothing to do")
+ finally:
+ f.close()
+
+
+ def getAuthenticatedHeader(self):
+ """
+ Log user in what.cd and returns the authenticated header
+ """
+ homepage = None
+ if os.path.exists("cookie"):
+ f = open("cookie", "r")
+ try:
+ self.whatcd.headers = pickle.load(f)
+ except EOFError:
+ os.remove("cookie")
+ print "invalid cookie, removed"
+ self.setCookie()
+ else:
+ self.setCookie()
+ #set authenticated user info
+ if 'id' not in self.whatcd.authenticateduserinfo:
+ self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage)
+
+ return self.whatcd.headers
+
+ def getAuthenticatedUserInfo(self, homepage = None):
+ """
+ Returns authenticated user's info
+ """
+ if not homepage:
+ homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body)
+ authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"}))
+ return authuserinfo
+
+ def getAuthenticatedUserId(self):
+ """
+ Returns authenticated user's id
+ """
+ return self.whatcd.authenticateduserinfo["id"]
+
+ def getAuthenticatedUserAuthCode(self):
+ """
+ Returns authenticated user's authcode
+ """
+ return self.whatcd.authenticateduserinfo["authcode"]
+
+
+ def getAuthenticatedUserUpload(self):
+ """
+ Returns authenticated user's total uploaded data
+ """
+ return self.whatcd.authenticateduserinfo["uploaded"]
+
+
+ def getAuthenticatedUserDownload(self):
+ """
+ Returns authenticated user's total downloaded data
+ """
+ return self.whatcd.authenticateduserinfo["downloaded"]
+
+
+ def getAuthenticatedUserRatio(self):
+ """
+ Returns authenticated user's ratio
+ """
+ return self.whatcd.authenticateduserinfo["ratio"]
+
+ def getAuthenticatedUserRequiredRatio(self):
+ """
+ Returns authenticated user's required ratio
+ """
+ return self.whatcd.authenticateduserinfo["required"]
+
+
+class User(WhatBase):
+ """A What.CD user"""
+
+ def __init__(self, username, whatcd):
+ """Create an user object.
+ # Parameters:
+ * username str: The user's name.
+ - whatcd object: the what.cd network object
+ """
+ WhatBase.__init__(self, whatcd)
+ self.name = username
+ self.whatcd = whatcd
+ self.userpage = "/user.php?"
+ self.userid = None
+ self.userinfo = None
+
+ def getUserName(self):
+ """
+ Returns user's name
+ """
+ return self.username
+
+ def getUserId(self):
+ """
+ Returns user's id, None if user doesn't exists
+ """
+ if self.userid:
+ return self.userid
+ else:
+ idform = {'action': "search", 'search': self.name}
+ data = urllib.urlencode(idform)
+ headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers
+ if dict(headers) is None:
+ return None
+ else:
+ self.userid = dict(headers)['location'][12:]
+ return self.userid
+
+ def getInfo(self):
+ """
+ Returns a dictionary of {percentile:{dataup str,
+ datadown str,
+ overall str,
+ postmade str,
+ boutyspent str,
+ reqfilled str,
+ artistsadded str,
+ torrentsup str},
+ stats: {uploaded str,
+ ratio str,
+ joined str,
+ downloaded str,
+ lastseen str,
+ rratio str},
+ community: {uploaded tuple(total str, url str),
+ forumposts tuple(total str, url str),
+ invited tuple (total,None),
+ perfectflacs tuple(total str, url str),
+ contributedcollages tuple(total str, url str),
+ reqvoted tuple(total str, url str),
+ uniquegroups tuple(total str, url str)
+ torrentscomments tuple(total str, url str),
+ snatched tuple(total str, url str),
+ artists str,
+ reqfilled tuple(total str, url str),
+ startedcollages tuple(total str, url str),
+ leeching tuple(total str, url str),
+ seeding tuple(total str, url str)}
+ }
+ If paranoia is not Off, it returns None.
+ """
+ if self.getUserId():
+ form = {'id': self.getUserId()}
+ data = urllib.urlencode(form)
+ userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body)
+ info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name)
+ self.userinfo = info
+ return info
+ else:
+ print "no user id retrieved"
+ return None
+
+
+ def getTorrentsSeeding(self, page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid,page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+ def getTorrentsSnatched(self,page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid,page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+ def getTorrentsUploaded(self, page=1):
+ """
+ Returns a list with all user's uploaded music torrents
+ in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+ artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+ album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+ url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid,page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().torrentsList(torrentspage)
+
+
+ def getTorrentsCommented(self, page=1):
+ """
+ Returns a list with all user's commented torrents
+ in form of dictionary {postid, torrentid, comment,postdate, pages}
+
+ """
+ if self.userid is None:
+ self.userid = self.getUserId()
+
+ url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1],page)
+ torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+ return self._parser().postsList(torrentspage)
+
+
+
+ ###############################################
+ # specific values #
+ ###############################################
+
+
+ def specificUserInfo(self):
+ """
+ Returns specific attributes of user info. None if user's paranoia is on
+ """
+ info = SpecificInformation()
+ # Initialize attributes
+ info.joindate, info.lastseen, info.dataup, info.datadown,\
+ info.ratio, info.rratio,info.uppercentile,info.downpercentile, \
+ info.torrentsuppercentile,info.reqfilledpercentile,info.bountyspentpercentile, \
+ info.postsmadepercentile,info.artistsaddedpercentile,info.overallpercentile, \
+ info.postsmadecom,info.torrentscommentscom,info.collagesstartedcom,info.collagescontrcon, \
+ info.reqfilledcom,info.reqvotedcom,info.uploadedcom,info.uniquecom, info.perfectcom, \
+ info.seedingcom, info.leechingcom,info.snatchedcom,info.invitedcom,info.artistsaddedcom \
+ = (None,None, None, None,None,None,None,None,None,None,None,None,None, None,\
+ None,None,None,None,None,None,None,None,None,None,None,None,None,None)
+
+
+ if not self.userinfo and self.getInfo() is None:
+ pass
+ else:
+ ######## stats ###########
+ info.joindate = self.userinfo['stats']['joined']
+ info.lastseen = self.userinfo['stats']['lastseen']
+ info.dataup = self.userinfo['stats']['uploaded']
+ info.datadown = self.userinfo['stats']['downloaded']
+ info.ratio = self.userinfo['stats']['ratio']
+ info.rratio = self.userinfo['stats']['rratio']
+ ######## percentile ###########
+ info.uppercentile = self.userinfo['percentile']['dataup']
+ info.downpercentile = self.userinfo['percentile']['datadown']
+ info.torrentsuppercentile = self.userinfo['percentile']['torrentsup']
+ info.reqfilledpercentile = self.userinfo['percentile']['reqfilled']
+ info.bountyspentpercentile = self.userinfo['percentile']['bountyspent']
+ info.postsmadepercentile = self.userinfo['percentile']['postsmade']
+ info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded']
+ info.overallpercentile = self.userinfo['percentile']['overall']
+ ######## community ###########
+ info.postsmadecom = self.userinfo['community']['forumposts']
+ info.torrentscomments = self.userinfo['community']['torrentscomments']
+ info.collagesstartedcom = self.userinfo['community']['startedcollages']
+ info.collagescontrcon = self.userinfo['community']['contributedcollages']
+ info.reqfilledcom = self.userinfo['community']['reqfilled']
+ info.reqvotedcom = self.userinfo['community']['reqvoted']
+ info.uploadedcom = self.userinfo['community']['uploaded']
+ info.uniquecom = self.userinfo['community']['uniquegroups']
+ info.perfectcom = self.userinfo['community']['pefectflacs']
+ info.seedingcom = self.userinfo['community']['seeding']
+ info.leechingcom = self.userinfo['community']['leeching']
+ info.snatchedcom = self.userinfo['community']['snatched']
+ info.invitedcom = self.userinfo['community']['invited'][0]
+ info.artistsaddedcom = self.userinfo['community']['artists']
+
+
+
+ return info
+
+
+class Torrent(WhatBase):
+ """A What.CD torrent"""
+
+ def __init__(self, id, page, isparent, whatcd):
+ """Create a torrent object.
+ # Parameters:
+ * id str: The torrent's id.
+ * whatcd object: the WhatCD network object
+ * page: The torrent page's number [optional]
+ """
+ WhatBase.__init__(self, whatcd)
+ self.id = id
+ self.page = page
+ self.whatcd = whatcd
+ self.isParent = isparent
+ self.torrentpage = "/torrents.php?"
+ self.torrentinfo = self.getInfo()
+
+
+ def getTorrentUrl(self):
+ """
+ Returns a dictionnary torrent's real URL
+ """
+ if self.isParent:
+ form = {'id': self.id, 'page':self.page}
+ data = urllib.urlencode(form)
+ return self.torrentpage + data
+ else:
+ form = {'torrentid': self.id, 'page':self.page}
+ data = urllib.urlencode(form)
+ headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers
+ if dict(headers) is None:
+ return None
+ else:
+ return dict(headers)['location']
+
+
+ def getInfo(self):
+ """
+ Returns a dictionnary with torrents's info
+ """
+ torrentpage = BeautifulSoup(self._request("GET", "/"+self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body)
+
+ if 'Site log' in torrentpage.find("title").string:
+ print "no torrent retrieved with such id"
+ return None
+ else:
+ return self._parser().torrentInfo(torrentpage, self.id, self.isParent)
+
+
+ def getTorrentParentId(self):
+ """
+ Returns torrent's group id
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['parentid']
+
+ def getTorrentDownloadURL(self):
+ """
+ Returns relative url to download the torrent
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['downloadurl']
+
+ def getTorrentDetails(self):
+ """
+ Returns torrent's details (format / bitrate)
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['details']
+
+ def getTorrentEditionInfo(self):
+ """
+ Returns torrent's edition info (Edition information / media type)
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['editioninfo']
+
+ def getTorrentMediaType(self):
+ """
+ Returns torrent's media type
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['rlsmedia']
+
+ def getTorrentSize(self):
+ """
+ Returns torrent's size
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['size']
+
+
+ def getTorrentSnatched(self):
+ """
+ Returns torrent's total snatches
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['snatched']
+
+
+ def getTorrentSeeders(self):
+ """
+ Returns torrent's current seeders
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['seeders']
+
+ def getTorrentLeechers(self):
+ """
+ Returns torrent's current leechers
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['leechers']
+
+ def getTorrentUploadedBy(self):
+ """
+ Returns torrent's uploader
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['uploadedby']
+
+ def getTorrentFolderName(self):
+ """
+ Returns torrent's folder name
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['foldername']
+
+ def getTorrentFileList(self):
+ """
+ Returns torrent's file list
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['filelist']
+
+
+ def getTorrentReleaseType(self):
+ """
+ Returns torrent's release type
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['rlstype']
+
+ def getTorrentDescription(self):
+ """
+ Returns torrent's description / empty string is there's none
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['torrentdescription']
+
+ def getTorrentComments(self):
+ """
+ Returns a list of dictionnaries with each comment in the torrent page
+ {postid,post,userid,username}
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['comments']
+
+ def getTorrentCommentsPagesNumber(self):
+ """
+ Returns number of pages of comments in the torrent
+ """
+ if self.torrentinfo:
+ return self.torrentInfo['torrent']['commentspages']
+
+ def isTorrentFreeLeech(self):
+ """
+ Returns True if torrent is freeleeech, False if not
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['isfreeleech']
+
+ def isTorrentReported(self):
+ """
+ Returns True if torrent is reported, False if not
+ """
+ if self.torrentinfo:
+ return self.torrentinfo['torrent']['isreported']
+
+
+class Artist(WhatBase):
+ """A What.CD artist"""
+
+ def __init__(self, name, whatcd):
+ """Create an artist object.
+ # Parameters:
+ * name str: The artist's name.
+ * whatcd object: The WhatCD network object
+ """
+ WhatBase.__init__(self, whatcd)
+ self.name = name
+ self.whatcd = whatcd
+ self.artistpage = "/artist.php"
+ self.utils = Utils()
+ self.info = self.getInfo()
+
+
+ def getArtistName(self):
+ """
+ Returns artist's name
+ """
+ return self.name
+
+ def getArtistId(self):
+ """
+ Returns artist's id, None if artist's not found
+ """
+ form = {'artistname': self.name}
+ data = urllib.urlencode(form)
+ headers = self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).headers
+ if dict(headers)['location'][0:14] != 'artist.php?id=':
+ return None
+ else:
+ return dict(headers)['location'][14:]
+
+ def getInfo(self):
+ """
+ Returns artist's info, None if there isn't
+ """
+ if self.getArtistId():
+ form = {'id': self.getArtistId()}
+ data = urllib.urlencode(form)
+ artistpage = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
+ return self._parser().artistInfo(artistpage)
+ else:
+ print "no artist info retrieved"
+ return None
+
+ def getArtistReleases(self):
+ """
+ Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id}
+ """
+ return self.info['releases']
+
+ def getArtistImage(self):
+ """
+ Return the artist image URL, None if there's no image
+ """
+ return self.info['image']
+
+ def getArtistInfo(self):
+ """
+ Return the artist's info, blank string if none
+ """
+ return self.info['info']
+
+ def getArtistTags(self):
+ """
+ Return a list with artist's tags
+ """
+ return self.info['tags']
+
+ def getArtistSimilar(self):
+ """
+ Return a list with artist's similar artists
+ """
+ return self.info['similarartists']
+
+ def getArtistRequests(self):
+ """
+ Returns a list with all artist's requests in form of dictionary {requestname, id}
+ """
+ return self.info['requests']
+
+ def setArtistInfo(self, id, info):
+ """
+ Updates what.cd artist's info and image
+ Returns 1 if artist info updated succesfully, 0 if not.
+ # Parameters:
+ * id str: what.cd artist's id
+ * info tuple: (The artist's info -str-, image url -str- (None if there isn't))
+ """
+ if info[0]:
+ params = {'action': 'edit','artistid':id}
+ data = urllib.urlencode(params)
+
+ edit_page = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
+ what_form = self._parser().whatForm(edit_page,'edit')
+ if info[1]:
+ image_to_post = info[1]
+ else:
+ image_to_post = what_form['image']
+ data_to_post = {'body': info[0].encode('utf-8'),
+ 'summary':'automated artist info insertion',\
+ 'image':image_to_post,\
+ 'artistid':what_form['artistid'],\
+ 'auth':what_form['auth'],\
+ 'action':what_form['action']}
+
+ #post artist's info
+ self.whatcd.headers['Content-type']="application/x-www-form-urlencoded"
+ response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False)
+ artist_id_returned = dict(response.headers)['location'][14:]
+
+ if str(artist_id_returned) == str(what_form['artistid']) :
+ return 1
+ else:
+ return 0
+
+ else:
+ return 'no artist info provided. Aborting.'
+ exit()
+
+
+class Parser(object):
+
+ def __init__(self,whatcd):
+ self.utils = Utils()
+ self.whatcd = whatcd
+ self.totalpages = 0
+
+ def authenticatedUserInfo(self, dom):
+ """
+ Parse the index page and returns a dictionnary with basic authenticated user information
+ """
+ userInfo = {}
+ soup = BeautifulSoup(str(dom))
+ for ul in soup.fetch('ul'):
+ if ul["id"] == "userinfo_username":
+ #retrieve user logged id
+ hrefid = ul.findAll('li')[0].find("a")["href"]
+ regid = re.compile('[0-9]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["id"] = regid.search(hrefid).group(0)
+
+ #retrieve user logged id
+ hrefauth = ul.findAll('li')[2].find("a")["href"]
+ regauth = re.compile('=[0-9a-fA-F]+')
+ if regid.search(hrefid) is None:
+ self.debugMessage("not found href to retrieve user id")
+ else:
+ userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:]
+
+ elif ul["id"] == "userinfo_stats":
+ if len(ul.findAll('li')) > 0:
+ userInfo["uploaded"] = ul.findAll('li')[0].find("span").string
+ userInfo["downloaded"] = ul.findAll('li')[1].find("span").string
+ userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string
+ userInfo["required"] = ul.findAll('li')[3].find("span").string
+ userInfo["authenticate"] = True
+
+ return userInfo
+
+ def userInfo(self, dom, user):
+ """
+ Parse an user's page and returns a dictionnary with its information
+
+ # Parameters:
+ * dom str: user page html
+ * user str: what.cd username
+ """
+ userInfo = {'stats':{}, 'percentile':{}, 'community':{}}
+ soup = BeautifulSoup(str(dom))
+
+ for div in soup.fetch('div',{'class':'box'}):
+
+ #if paronoia is not set to 'Off', stop collecting data
+ if div.findAll('div')[0].string == "Personal":
+ if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off":
+ return None
+
+ statscontainer = soup.findAll('div', {'class':'box'})[1]
+ percentilecontainer = soup.findAll('div', {'class':'box'})[2]
+ communitycontainer = soup.findAll('div', {'class':'box'})[4]
+
+
+ userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title']
+ userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title']
+ userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:]
+ userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:]
+ userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string
+ userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:]
+ userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:]
+ userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:]
+ userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:]
+ userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:]
+ userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:]
+ userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:]
+ userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:]
+ userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:]
+
+ userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\
+ communitycontainer.findAll('li')[0].find('a')['href'])
+ userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\
+ communitycontainer.findAll('li')[1].find('a')['href'])
+ userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\
+ communitycontainer.findAll('li')[2].find('a')['href'])
+ userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\
+ communitycontainer.findAll('li')[3].find('a')['href'])
+ userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\
+ communitycontainer.findAll('li')[4].find('a')['href'])
+ userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\
+ communitycontainer.findAll('li')[5].find('a')['href'])
+ userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\
+ communitycontainer.findAll('li')[6].find('a')['href'])
+ userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\
+ communitycontainer.findAll('li')[7].find('a')['href'])
+ userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\
+ communitycontainer.findAll('li')[8].find('a')['href'])
+ userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\
+ communitycontainer.findAll('li')[9].find('a')['href'])
+ userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\
+ communitycontainer.findAll('li')[10].find('a')['href'])
+ #NB: there's a carriage return and white spaces inside the snatched li tag
+ userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\
+ communitycontainer.findAll('li')[11].find('a')['href'])
+ userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\
+ None)
+ userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title']
+
+ return userInfo
+
+ def torrentInfo(self, dom, id, isparent):
+ """
+ Parse a torrent's page and returns a dictionnary with its information
+ """
+
+ torrentInfo = {'torrent':{}}
+ torrentfiles = []
+ torrentdescription = ""
+ isreported = False
+ isfreeleech = False
+ soup = BeautifulSoup(str(dom))
+ if isparent:
+ torrentInfo['torrent']['parentid'] = id
+ else:
+ groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href']
+ torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1]
+ regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray')
+ torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0)
+ torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=")+1:]
+ torrentInfo['torrent']['downloadurl'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a',{'title':'Download'})[0]['href']
+ ## is freeleech or/and reported? ##
+ #both
+ if len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 4:
+ isreported = True
+ isfreeleech = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+ #either
+ elif len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 2:
+ if soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Reported':
+ isreported = True
+ elif soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!':
+ isreported = True
+ torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+ #none
+ else:
+ torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+ torrentInfo['torrent']['isfreeleech'] = isfreeleech
+ torrentInfo['torrent']['isreported'] = isreported
+ torrentInfo['torrent']['size'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[1].string
+ torrentInfo['torrent']['snatched'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[2].string
+ torrentInfo['torrent']['seeders'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[3].string
+ torrentInfo['torrent']['leechers'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[4].string
+ torrentInfo['torrent']['uploadedby'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('a')[0].string
+ foldername = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('div')[1].string
+ if(foldername is None):
+ torrentInfo['torrent']['foldername'] = None
+ else:
+ torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername)
+ files = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('tr')
+ for file in files[1:-1]:
+ torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string))
+ torrentInfo['torrent']['filelist'] = torrentfiles
+ #is there any description?
+ if len(soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')) > 1:
+ description = torrentInfo['torrent']['description'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')[1].contents
+ info = ''
+ for content in description:
+ if content.string:
+ info = "%s%s" % (info, self.utils._string(content.string))
+ torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string))
+ torrentInfo['torrent']['torrentdescription'] = torrentdescription
+ regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown')
+ torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0)
+
+ torrentInfo['torrent']['comments'] = []
+ torrentInfo['torrent']['commentspages'] = 0
+
+ if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0:
+ linkbox = dom.findAll("div", {"class": "linkbox"})[-1]
+ pages = 1
+ postid = ''
+ userid = ''
+ post = ''
+ # if there's more than 1 page of torrents
+ if linkbox.find("a"):
+ # by default torrent page show last page of comments
+ lastpage = linkbox.findAll("a")[-1]['href']
+ pages = int(lastpage[18:lastpage.find('&')]) +1
+ for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}):
+ postid = comment.find("a",{"class":"post_id"}).string[1:]
+ userid = comment.findAll("a")[1]['href'][12:]
+ username = comment.findAll("a")[1].string
+ post = comment.find("div", {"id":"content"+postid})
+ post = u''.join([post.string for post in post.findAll(text=True)])
+ torrentInfo['torrent']['comments'].append({"postid":postid,"post":post,"userid":userid,"username":username})
+
+ torrentInfo['torrent']['commentspages'] = pages
+
+ return torrentInfo
+
+ def artistInfo(self, dom):
+ """
+ Parse an artist's page and returns a dictionnary with its information
+ """
+ artistInfo = {}
+ releases = []
+ requests = []
+ infoartist = ""
+ tagsartist = []
+ similarartists = []
+ soup = BeautifulSoup(str(dom))
+ soupfetch = soup.fetch('table',{'class':'torrent_table'})
+ if not soupfetch:
+ soupfetch = soup.fetch('table',{'class':'torrent_table grouped release_table'})
+ for releasetype in soupfetch:
+ releasetypenames = releasetype.findAll('strong')
+ releasetypename = releasetype.findAll('strong')[0].string
+ for release in releasetypenames[1:-1]:
+ #skip release edition info and Freeleech! s
+ if len(release.parent.contents) > 1 and len(release.contents) > 1 :
+ releaseyear = release.contents[0][0:4]
+ releasename = release.contents[1].string
+ releasehref = release.contents[1]['href']
+ releaseid = releasehref[releasehref.rfind('=')+1:]
+ releases.append({'releasetype':releasetypename,\
+ 'year': releaseyear,'name':self.utils.decodeHTMLEntities(releasename),'id':releaseid})
+
+ artistInfo['releases'] = releases
+ #is there an artist image?
+ artistInfo['image'] = None
+ if soup.find('div', {'class':'box'}).find('img'):
+ artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src']
+ #is there any artist info?
+ contents = soup.find('div', {'class':'body'}).contents
+ if len(contents) > 0:
+ for content in contents:
+ if content.string:
+ infoartist = "%s%s" % (infoartist, self.utils._string(content.string))
+ artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist)
+ #is there any artist tags?
+ if soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li'):
+ ul = soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li')
+ for li in ul:
+ if li.contents[0].string:
+ tagsartist.append(self.utils._string(li.contents[0].string))
+ artistInfo['tags'] = tagsartist
+ #is there any similar artist?
+ if soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'}):
+ artists = soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'})
+ for artist in artists:
+ if artist.contents[0].string:
+ similarartists.append(self.utils._string(artist.contents[0].string))
+ artistInfo['similarartists'] = similarartists
+ #is there any request?
+ if soup.find('table',{'id':'requests'}):
+ for request in soup.find('table',{'id':'requests'}).findAll('tr',{'class':re.compile('row')}):
+ requests.append({'requestname':request.findAll('a')[1].string,'id':request.findAll('a')[1]['href'][28:]})
+
+ artistInfo['requests'] = requests
+
+ return artistInfo
+
+ def torrentsList(self,dom):
+ """
+ Parse a torrent's list page and returns a dictionnary with its information
+ """
+ torrentslist = []
+ torrentssoup = dom.find("table", {"width": "100%"})
+ pages = 0
+
+ #if there's at least 1 torrent in the list
+ if torrentssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+ pages = 1
+ regyear = re.compile('\[\d{4}\]')
+
+ #is there a page navigation bar?
+ if navsoup.contents:
+ #if there's more than 1 page of torrents
+ if navsoup.contents[-1].has_key('href'):
+ lastpage = navsoup.contents[-1]['href']
+ pages = lastpage[18:lastpage.find('&')]
+ self.totalpages = pages
+ else: #we are at the last page, no href
+ pages = self.totalpages+1
+ #fetch all tr except first one (column head)
+ for torrent in torrentssoup.fetch('tr')[1:]:
+ #exclude non music torrents
+ if torrent.find('td').find('div')['class'][0:10] == 'cats_music':
+
+ torrenttag = torrent.find('td').contents[1]['title']
+ torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href']
+ torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href']
+ torrentid = torrentrm[torrentrm.rfind('=')+1:]
+ torrenttd = torrent.findAll('td')[1]
+
+ # remove dataless elements
+ torrenttags = torrenttd.div
+ rightlinks = torrenttd.span
+ torrenttags.extract()
+ rightlinks.extract()
+
+ # remove line breaks
+ torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")])
+ torrenttd = BeautifulSoup(torrenttd)
+ isScene = False
+ info = ""
+
+ if len(torrenttd.findAll('a')) == 2:
+ #one artist
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),)
+ artistid = (torrenttd.find("a")['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[1].string
+ info = torrenttd.findAll("a")[1].nextSibling.string.strip()
+
+
+ elif len(torrenttd.findAll('a')) == 1:
+ #various artists
+ torrentartist = ('Various Artists',)
+ artistid = ()
+ torrentalbum = torrenttd.find("a").string
+ info = torrenttd.find("a").nextSibling.string.strip()
+
+ elif len(torrenttd.findAll('a')) == 3:
+ #two artists
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string), \
+ self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string))
+ artistid = (torrenttd.findAll("a")[0]['href'][14:],\
+ torrenttd.findAll("a")[1]['href'][14:])
+ torrentalbum = torrenttd.findAll("a")[2].string
+ info = torrenttd.findAll("a")[2].nextSibling.string.strip()
+
+ elif torrenttd.find(text=re.compile('performed by')):
+ #performed by
+ torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),)
+ artistid = (torrenttd.findAll("a")[-2]['href'][14:],)
+ torrentalbum = torrenttd.findAll("a")[-1].string
+ info = torrenttd.findAll("a")[-1].nextSibling.string.strip()
+
+ if 'Scene' in info:
+ isScene = True
+
+ torrentyear = regyear.search(info).group(0)[1:5]
+ torrentslist.append({'tag':torrenttag,\
+ 'dlurl':torrentdl,\
+ 'id':torrentid, \
+ 'artist':torrentartist,\
+ 'artistid':artistid,\
+ 'album':self.utils.decodeHTMLEntities(torrentalbum),
+ 'year':torrentyear,
+ 'pages':pages,
+ 'scene':isScene})
+
+ return torrentslist
+
+ def postsList(self,dom):
+ """
+ Parse a post list page and returns a dictionnary with each post information:
+ {torrentid, commentid, postid}
+ """
+ postslist = []
+ postssoup = dom.find("div", {"class": "thin"})
+ pages = 0
+
+ #if there's at least 1 post in the list
+ if postssoup:
+ navsoup = dom.find("div", {"class": "linkbox"})
+
+ #if there's more than 1 page of torrents
+ if navsoup.find("a"):
+ lastpage = navsoup.findAll("a")[1]['href']
+ pages = lastpage[18:lastpage.find('&')]
+ self.totalpages = pages
+ else: #we are at the last page, no link
+ pages = 1
+
+ for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}):
+ commentbody = post.find("td", {"class":"body"})
+ postid = post.find("span").findAll("a")[0].string[1:]
+ torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=')+1:]
+ comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)])
+ postdate = post.find("span", {"class":"time"})['title']
+ postslist.append({'postid':postid,\
+ 'torrentid':torrentid,\
+ 'comment':comment,\
+ 'postdate':postdate,\
+ 'pages':pages})
+
+
+ return postslist
+
+
+ def whatForm(self, dom, action):
+ """
+ Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values
+ # Parameters:
+ * dom str: the edit page dom.
+ + action str: the action value from the requested form
+ """
+ inputs = {}
+
+ form = dom.find('input',{'name':'action','value':action}).parent
+ elements = form.fetch(('input','textarea'))
+ #get all form elements except for submit input
+ for element in elements[0:-1]:
+ name = element.get('name',None)
+ if element.name == 'textarea':
+ inputs[name] = element.string
+ else:
+ inputs[name] = element.get('value',None)
+ return inputs
+
+
+
+if __name__ == "__main__":
+ print "Module to manage what.cd as a web service"