From cb22e917cdf93789cf3cb1111a801a07f6e72411 Mon Sep 17 00:00:00 2001
From: Aaron Cohen <cohena@gmail.com>
Date: Tue, 11 Sep 2012 00:13:12 -0700
Subject: [PATCH] What.cd WIP, now using api rather than RSS

---
 headphones/__init__.py |   14 +-
 headphones/searcher.py |   94 +--
 headphones/webserve.py |   20 +-
 lib/whatapi.py         | 1442 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1507 insertions(+), 63 deletions(-)
 create mode 100755 lib/whatapi.py

diff --git a/headphones/__init__.py b/headphones/__init__.py
index 761e57db..bf9738b9 100644
--- a/headphones/__init__.py
+++ b/headphones/__init__.py
@@ -155,8 +155,8 @@ WAFFLES = None
 WAFFLES_UID = None
 WAFFLES_PASSKEY = None
 WHATCD = None
-WHATCD_UID = None
-WHATCD_PASSKEY = None
+WHATCD_USERNAME = None
+WHATCD_PASSWORD = None
 DOWNLOAD_TORRENT_DIR = None
 
 INTERFACE = None
@@ -251,7 +251,7 @@ def initialize():
                 LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \
                 RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, \
                 ADD_ALBUM_ART, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \
-                TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_UID, WHATCD_PASSKEY, DOWNLOAD_TORRENT_DIR, \
+                TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_USERNAME, WHATCD_PASSWORD, DOWNLOAD_TORRENT_DIR, \
                 LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \
                 NZBMATRIX, NZBMATRIX_USERNAME, NZBMATRIX_APIKEY, NEWZNAB, NEWZNAB_HOST, NEWZNAB_APIKEY, NEWZNAB_ENABLED, EXTRA_NEWZNABS,\
                 NZBSORG, NZBSORG_UID, NZBSORG_HASH, NEWZBIN, NEWZBIN_UID, NEWZBIN_PASSWORD, LASTFM_USERNAME, INTERFACE, FOLDER_PERMISSIONS, \
@@ -348,8 +348,8 @@ def initialize():
         WAFFLES_PASSKEY = check_setting_str(CFG, 'Waffles', 'waffles_passkey', '')
 
         WHATCD = bool(check_setting_int(CFG, 'What.cd', 'whatcd', 0))
-        WHATCD_UID = check_setting_str(CFG, 'What.cd', 'whatcd_uid', '')
-        WHATCD_PASSKEY = check_setting_str(CFG, 'What.cd', 'whatcd_passkey', '')
+        WHATCD_USERNAME = check_setting_str(CFG, 'What.cd', 'whatcd_username', '')
+        WHATCD_PASSWORD = check_setting_str(CFG, 'What.cd', 'whatcd_password', '')
 
         SAB_HOST = check_setting_str(CFG, 'SABnzbd', 'sab_host', '')
         SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '')
@@ -631,8 +631,8 @@ def config_write():
 
     new_config['What.cd'] = {}
     new_config['What.cd']['whatcd'] = int(WHATCD)
-    new_config['What.cd']['whatcd_uid'] = WHATCD_UID
-    new_config['What.cd']['whatcd_passkey'] = WHATCD_PASSKEY
+    new_config['What.cd']['whatcd_username'] = WHATCD_USERNAME
+    new_config['What.cd']['whatcd_password'] = WHATCD_PASSWORD
 
     new_config['General']['search_interval'] = SEARCH_INTERVAL
     new_config['General']['libraryscan_interval'] = LIBRARYSCAN_INTERVAL
diff --git a/headphones/searcher.py b/headphones/searcher.py
index ec5c86ea..d54d6d76 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -15,6 +15,7 @@
 
 import urllib, urllib2, urlparse
 import lib.feedparser as feedparser
+import lib.whatapi as whatapi
 from xml.dom import minidom
 from xml.parsers.expat import ExpatError
 from StringIO import StringIO
@@ -808,69 +809,70 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
 
         if headphones.WHATCD:
             provider = "What.cd"
-            providerurl = url_fix("https://www.what.cd/browse.php")
 
             bitrate = None
             if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
-                format = "FLAC"
-                bitrate = "(Lossless)"
+                format_regex = "FLAC"
                 maxsize = 10000000000
             elif headphones.PREFERRED_QUALITY:
-                format = "FLAC OR MP3"
+                format_regex = "(FLAC|MP3)"
                 maxsize = 10000000000
             else:
-                format = "MP3"
+                format_regex = "MP3"
                 maxsize = 300000000
 
-            query_items = ['artist:"%s"' % artistterm,
-                           'album:"%s"'   % albumterm,
-                           'format:(%s)' % format,
-                           'size:[0 TO %d]' % maxsize,
-                           '-seeders:0'] # cut out dead torrents
-            if bitrate:
-                query_items.append('bitrate:"%s"' % bitrate)
-
-            params = {
-                "uid": headphones.WHATCD_UID,
-                "passkey": headphones.WHATCD_PASSKEY,
-                "rss": "1",
-                "c0": "1",
-                "s": "seeders", # sort by
-                "d": "desc" # direction
-            }
-
-            searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items)))
-
             try:
-                data = urllib2.urlopen(searchURL, timeout=20).read()
-            except urllib2.URLError, e:
-                logger.warn('Error fetching data from %s: %s' % (provider, e))
-                data = False
+                whatcd = whatapi.getWhatcdNetwork(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD)
+            except:
+                whatcd = None
+                logger.warn("What.cd credentials incorrect or site is down.")
 
-            if data:
+            if whatcd:
+                whatcd.enableCaching()
 
-                d = feedparser.parse(data)
-                if not len(d.entries):
-                    logger.info(u"No results found from %s for %s" % (provider, term))
-                    pass
+                artist = whatcd.getArtist(artistterm)
+                artist_id = artist.getArtistId()
+            else:
+                artist_id = None
 
-                else:
-                    for item in d.entries:
-                        try:
-                            title_match = re.search(r"(.+)\[(.+)\]$", item.title)
-                            title = title_match.group(1).strip()
-                            details = title_match.group(2).split("-")
+            if artist_id: # will be None if artist not found
+                logger.info(u"What.cd artist ID: %s" % artist_id)
+                artist_releases = artist.getArtistReleases()
+                logger.info(u"Found %d releases on what.cd for %s" % (len(artist_releases), artistterm))
+                #Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id}
+            else:
+                artist_releases = []
 
-                            desc_match = re.search(r"Size: (\d+)<", item.description)
-                            size = desc_match.group(1)
+            possible_matches = [ release for release in artist_releases if albumterm in release['name'] ]
 
-                            url = item.link
+            # cap at 10 matches, 1 per second to reduce hits on API...don't wanna get in trouble.
+            # Might want to turn up number of matches later.
+            max_torrent_info_reads = 10
+            info_read_rate = 1
 
-                            resultlist.append((title, size, url, provider))
-                            logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
-                        except Exception, e:
-                            logger.error(u"An error occurred while trying to parse the response from What.cd: %s" % e)
+            match_torrents = []
+            for i, release in enumerate(possible_matches[:max_torrent_info_reads]):
+                if i > 0:
+                    time.sleep(info_read_rate)
+                match_torrents.append(whatcd.getTorrent(release['id']))
 
+            # filter on format, size, and num seeders
+            match_torrents = [ torrent for torrent in match_torrents
+                               if re.search(format_regex, torrent.getTorrentDetails(), flags=re.I)
+                                and torrent.getTorrentSize() <= maxsize
+                                and torrent.getTorrentSeeders() >= minimumseeders ]
+
+            # sort by times d/l'd
+            if not len(possible_matches):
+                logger.info(u"No results found from %s for %s after filtering" % (provider, term))
+            elif len(match_torrents) > 1:
+                match_torrents.sort(match_torrents, key=whatapi.Torrent.getTorrentSeeders)
+
+            for torrent in match_torrents:
+                resultlist.append((torrent.getTorrentFolderName(),
+                                   torrent.getTorrentSize(),
+                                   torrent.getTorrentDownloadURL(),
+                                   provider))
 
         if headphones.ISOHUNT:
             provider = "isoHunt"    
diff --git a/headphones/webserve.py b/headphones/webserve.py
index 5a5cc31f..6e225166 100644
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -541,21 +541,21 @@ class WebInterface(object):
         
         return serve_template(templatename="config.html", title="Settings", config=config)  
     config.exposed = True
-    
-    
+
+
     def configUpdate(self, http_host='0.0.0.0', http_username=None, http_port=8181, http_password=None, launch_browser=0, api_enabled=0, api_key=None, 
         download_scan_interval=None, nzb_search_interval=None, libraryscan_interval=None, sab_host=None, sab_username=None, sab_apikey=None, sab_password=None, 
         sab_category=None, download_dir=None, blackhole=0, blackhole_dir=None, usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, 
         newznab=0, newznab_host=None, newznab_apikey=None, newznab_enabled=0, nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, newzbin=0, newzbin_uid=None, 
         newzbin_password=None, preferred_quality=0, preferred_bitrate=None, detect_bitrate=0, move_files=0, torrentblackhole_dir=None, download_torrent_dir=None, 
-        numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, rename_files=0, correct_metadata=0, 
-        cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None, 
-        include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, 
-        interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None, 
-        encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0, prowl_enabled=0, prowl_onsnatch=0, 
-        prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0, xbmc_notify=0, nma_enabled=False, 
-        nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None, customsleep=None, hpuser=None, hppass=None, 
-        preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs):
+        numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, whatcd=0, whatcd_uid=None, whatcd_passkey=None,
+        rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None,
+        folder_format=None, file_format=None, include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0,
+        autowant_upcoming=False, autowant_all=False, interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None,
+        encoderfolder=None, advancedencoder=None, encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0,
+        prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0,
+        xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None,
+        customsleep=None, hpuser=None, hppass=None, preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs):
 
         headphones.HTTP_HOST = http_host
         headphones.HTTP_PORT = http_port
diff --git a/lib/whatapi.py b/lib/whatapi.py
new file mode 100755
index 00000000..bc6f4394
--- /dev/null
+++ b/lib/whatapi.py
@@ -0,0 +1,1442 @@
+# -*- coding: utf_8 -*-
+#################################################################################
+#
+# Name: whatapi.py
+#
+# Synopsis: Module to manage what.cd as a web service
+#
+# Description: See below list of the implemented webservices
+#
+# Copyright 2010 devilcius
+#
+#                          The Wide Open License (WOL)
+#
+# Permission to use, copy, modify, distribute and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice and this license appear in all source copies.
+# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
+# ANY KIND. See http://www.dspguru.com/wide-open-license for more information.
+#
+#################################################################################
+
+
+__author__="devilcius"
+__date__ ="$Oct 23, 2010 11:21:12 PM$"
+
+
+import hashlib
+try:
+    from BeautifulSoup import BeautifulSoup
+except:
+    raise ImportError,"Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download"
+import httplib
+import os
+import pickle
+import re
+import urllib
+import shelve
+import tempfile
+from htmlentitydefs import name2codepoint as n2cp
+
+
+"""
+A list of the implemented webservices (from what.cd )
+=====================================
+
+# User
+
+    * user.getUserId
+    * user.getInfo
+
+    * user.getTorrentsSeeding
+    * user.getTorrentsSnatched
+    * user.getTorrentsUploaded
+    * user.getTorrentsCommented
+
+    * user.specificUserInfo
+        Atributes:
+        ######## stats ###########
+        -joindate
+        -lastseen
+        -dataup
+        -datadown
+        -ratio
+        -rratio
+        ######## percentile ###########
+        -uppercentile
+        -downpercentile
+        -torrentsuppercentile
+        -reqfilledpercentile
+        -bountyspentpercentile
+        -postsmadepercentile
+        -artistsaddedpercentile
+        -overallpercentile
+        ######## community ###########
+        -postsmade
+        -torrentscomments
+        -collagesstarted
+        -collagescontr
+        -reqfilled
+        -reqvoted
+        -uploaded
+        -unique
+        -perfect
+        -seeding
+        -leeching
+        -snatched
+        -invited
+        -artistsadded
+
+
+# Artist
+
+    * artist.getArtistReleases
+    * artist.getArtistImage
+    * artist.getArtistInfo
+    * artist.getArtistTags
+    * artist.getArtistSimilar
+    * artist.getArtistRequests
+
+    + artist.setArtistInfo
+
+
+# Torrent
+
+    * torrent.getTorrentParentId
+    * torrent.getTorrentDownloadURL
+    * torrent.getTorrentDetails
+    * torrent.getTorrentSize
+    * torrent.getTorrentSnatched
+    * torrent.getTorrentSeeders
+    * torrent.getTorrentLeechers
+    * torrent.getTorrentUploadedBy
+    * torrent.getTorrentFolderName
+    * torrent.getTorrentFileList
+    * torrent.getTorrentDescription
+    * torrent.getTorrentComments
+    * torrent.isTorrentFreeLeech
+    * torrent.isTorrentReported
+
+
+# Authenticate
+
+    * authenticate.getAuthenticatedUserId
+    * authenticate.getAuthenticatedUserAuthCode
+    * authenticate.getAuthenticatedUserDownload
+    * authenticate.getAuthenticatedUserUpload()
+    * authenticate.getAuthenticatedUserRatio
+    * authenticate.getAuthenticatedUserRequiredRatio
+
+"""
+
+class ResponseBody:
+    """A Response Body Object"""
+    pass
+
+class SpecificInformation:
+    """A Specific Information Object"""
+    pass
+
+
+class WhatBase(object):
+    """An abstract webservices object."""
+    whatcd = None
+
+    def __init__(self, whatcd):
+        self.whatcd = whatcd
+        #if we are not autenticated in what.cd, do it now
+        if not self.whatcd.isAuthenticated():
+            print "authenticating..."
+            self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader()
+
+    def _request(self,type, path, data, headers):
+        return Request(self.whatcd,type,path,data,headers)
+
+    def _parser(self):
+        return Parser(self.whatcd)
+
+    def utils(self):
+        return Utils()
+
+
+class Utils():
+
+    def md5(self, text):
+        """Returns the md5 hash of a string."""
+
+        h = hashlib.md5()
+        h.update(self._string(text))
+
+        return h.hexdigest()
+
+    def _unicode(self, text):
+        if type(text) == unicode:
+            return text
+
+        if type(text) == int:
+            return unicode(text)
+
+        return unicode(text, "utf-8")
+
+    def _string(self, text):
+        if type(text) == str:
+            return text
+
+        if type(text) == int:
+            return str(text)
+
+        return text.encode("utf-8")
+
+    def _number(self,string):
+        """
+            Extracts an int from a string. Returns a 0 if None or an empty string was passed
+        """
+
+        if not string:
+            return 0
+        elif string == "":
+            return 0
+        else:
+            try:
+                return int(string)
+            except ValueError:
+                return float(string)
+
+    def substituteEntity(self, match):
+        ent = match.group(2)
+        if match.group(1) == "#":
+            return unichr(int(ent))
+        else:
+            cp = n2cp.get(ent)
+
+            if cp:
+                return unichr(cp)
+            else:
+                return match.group()
+
+    def decodeHTMLEntities(self, string):
+        entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
+        return entity_re.subn(self.substituteEntity, string)[0]
+
+
+
+class WhatCD(object):
+
+	def __init__(self, username, password, site, loginpage, headers):
+
+            #credentials
+            self.username = username
+            self.password = password
+            self.site = site
+            self.loginpage = loginpage
+            self.headers = headers
+            self.authenticateduserinfo = {}
+
+            self.cache_backend = None
+            self.proxy_enabled = False
+            self.proxy = None
+
+        def isAuthenticated(self):
+            """
+                Checks if we are authenticated in what.cd
+            """
+            if "id" in self.authenticateduserinfo:
+                return True
+            else:
+                return False
+
+        def getCredentials(self):
+            """
+                Returns an authenticated user credentials object
+            """
+            return Authenticate(self)
+
+
+        def getUser(self, username):
+            """
+                Returns an user object
+            """
+            return User(username, self)
+
+        def getTorrent(self, id, page=1):
+            """
+                Returns a torrent object
+            """
+            return Torrent(id, page, None, self)
+
+        def getTorrentGroup(self, id, page=1):
+            """
+                Returns a torrent object
+            """
+            return Torrent(id, page, True, self)
+
+        def getArtist(self, name):
+            """
+                Returns an artist object
+            """
+            return Artist(name, self)
+
+        def enableProxy(self, host, port):
+            """Enable a default web proxy"""
+            self.proxy = [host, Utils()._number(port)]
+            self.proxy_enabled = True
+
+        def disableProxy(self):
+            """Disable using the web proxy"""
+            self.proxy_enabled = False
+
+        def isProxyEnabled(self):
+            """Returns True if a web proxy is enabled."""
+            return self.proxy_enabled
+
+        def getProxy(self):
+            """Returns proxy details."""
+            return self.proxy
+
+        def enableCaching(self, file_path = None):
+            """Enables caching request-wide for all cachable calls.
+            * file_path: A file path for the backend storage file. If
+            None set, a temp file would probably be created, according the backend.
+            """
+            if not file_path:
+                file_path = tempfile.mktemp(prefix="whatapi_tmp_")
+
+            self.cache_backend = _ShelfCacheBackend(file_path)
+
+        def disableCaching(self):
+            """Disables all caching features."""
+            self.cache_backend = None
+
+        def isCachingEnabled(self):
+            """Returns True if caching is enabled."""
+
+            return not (self.cache_backend == None)
+
+        def getCacheBackend(self):
+
+            return self.cache_backend
+
+def getWhatcdNetwork(username = "", password = ""):
+    """
+    Returns a preconfigured WhatCD object for what.cd
+    # Parameters:
+        * username str: a username of a valid what.cd user
+        * password str: user's password
+    """
+
+    return WhatCD (
+                    username = username,
+                    password = password,
+                    site = "ssl.what.cd",
+                    loginpage = "/login.php",
+                    headers = {
+                        "Content-type": "application/x-www-form-urlencoded",
+                        'Accept-Charset': 'utf-8',
+                        'User-Agent': "whatapi [devilcius]"
+                        })
+
+
+
+class _ShelfCacheBackend(object):
+    """Used as a backend for caching cacheable requests."""
+    def __init__(self, file_path = None):
+        self.shelf = shelve.open(file_path)
+
+    def getHTML(self, key):
+        return self.shelf[key]
+
+    def setHTML(self, key, xml_string):
+        self.shelf[key] = xml_string
+
+    def hasKey(self, key):
+        return key in self.shelf.keys()
+
+
+class Request(object):
+    """web service operation."""
+
+    def __init__(self, whatcd,type, path, data, headers):
+
+        self.whatcd = whatcd
+        self.utils = Utils()
+        self.type = type
+        self.path = path
+        self.data = data
+        self.headers = headers
+        #enable catching?
+        if whatcd.isCachingEnabled():
+            self.cache = whatcd.getCacheBackend()
+
+    def getCacheKey(self):
+        """The cache key is a md5 hash of request params."""
+
+        key = self.type + self.path + self.data
+        return Utils().md5(key)
+
+    def getCachedResponse(self):
+        """Returns a file object of the cached response."""
+
+        if not self.isCached():
+            response = self.downloadResponse()
+            self.cache.setHTML(self.getCacheKey(), response)
+        return self.cache.getHTML(self.getCacheKey())
+
+    def isCached(self):
+        """Returns True if the request is already in cache."""
+
+        return self.cache.hasKey(self.getCacheKey())
+
+    def downloadResponse(self):
+        """Returns a ResponseBody object from the server."""
+
+        #print "downloading from %s" % (self.path)
+        conn = httplib.HTTPSConnection(self.whatcd.site)
+        rb = ResponseBody()
+
+        if self.whatcd.isProxyEnabled():
+            conn = httplib.HTTPSConnection(host = self.whatcd.getProxy()[0], port = self.whatcd.getProxy()[1])
+            conn.request(method = self.type, url="https://" + self.whatcd.site + self.path, body = self.data, headers = self.headers)
+        else:
+            conn.request(self.type, self.path, self.data, self.headers)
+
+        response = conn.getresponse()
+        rb.headers = response.getheaders()
+        # Rip all inline JavaScript out of the response in case it hasn't been properly escaped
+        rb.body = re.sub('<script type="text/javascript">[^<]+</script>', '', response.read())
+        conn.close()
+        return rb
+
+    def execute(self, cacheable = False):
+        """Depending if caching is enabled, returns response from the server or, if available, the cached response"""
+        if self.whatcd.isCachingEnabled() and cacheable:
+            response = self.getCachedResponse()
+        else:
+            response = self.downloadResponse()
+
+        return response
+
+class Authenticate(WhatBase):
+
+    def __init__(self, whatcd):
+        """Create an authenticated user object.
+        # Parameters:
+            * whatcd object: WhatCD object.
+        """
+        self.whatcd = whatcd
+        self.parser = Parser(whatcd)
+        if not self.whatcd.isAuthenticated():
+            self.getAuthenticatedHeader()
+
+    def setCookie(self):
+        print "creating cookie"
+        f = open('cookie', 'w')
+        loginform= {'username': self.whatcd.username, 'password': self.whatcd.password \
+                , 'keeplogged': '1', 'login': 'Login'}
+        data = urllib.urlencode(loginform)
+        response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True)
+        try:
+            cookie=dict(response.headers)['set-cookie']
+            session=re.search("session=[^;]+", cookie).group(0)
+            self.whatcd.headers["Cookie"] = session
+            homepage = response.body
+            pickle.dump(self.whatcd.headers, f)
+        except (KeyError, AttributeError):
+            os.remove('cookie')
+            self.whatcd.headers = None
+#            quit() # Commented out...can't have this quitting headphones on us
+            raise Exception("Login failed, most likely bad creds or the site is down, nothing to do")
+        finally:
+            f.close()
+
+
+    def getAuthenticatedHeader(self):
+        """
+            Log user in what.cd and returns the authenticated header
+        """
+        homepage = None
+        if os.path.exists("cookie"):
+            f = open("cookie", "r")
+            try:
+                self.whatcd.headers = pickle.load(f)
+            except EOFError:
+                os.remove("cookie")
+                print "invalid cookie, removed"
+                self.setCookie()
+        else:
+            self.setCookie()
+        #set authenticated user info
+        if 'id' not in self.whatcd.authenticateduserinfo:
+            self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage)
+
+        return self.whatcd.headers
+
+    def getAuthenticatedUserInfo(self, homepage = None):
+        """
+            Returns authenticated user's info
+        """
+        if not homepage:
+            homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body)
+        authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"}))
+        return authuserinfo
+
+    def getAuthenticatedUserId(self):
+        """
+            Returns authenticated user's id
+        """
+        return self.whatcd.authenticateduserinfo["id"]
+
+    def getAuthenticatedUserAuthCode(self):
+        """
+            Returns authenticated user's authcode
+        """
+        return self.whatcd.authenticateduserinfo["authcode"]
+
+
+    def getAuthenticatedUserUpload(self):
+        """
+            Returns authenticated user's total uploaded data
+        """
+        return self.whatcd.authenticateduserinfo["uploaded"]
+
+
+    def getAuthenticatedUserDownload(self):
+        """
+            Returns authenticated user's total downloaded data
+        """
+        return self.whatcd.authenticateduserinfo["downloaded"]
+
+
+    def getAuthenticatedUserRatio(self):
+        """
+            Returns authenticated user's ratio
+        """
+        return self.whatcd.authenticateduserinfo["ratio"]
+
+    def getAuthenticatedUserRequiredRatio(self):
+        """
+            Returns authenticated user's required ratio
+        """
+        return self.whatcd.authenticateduserinfo["required"]
+
+
+class User(WhatBase):
+    """A What.CD user"""
+
+    def __init__(self, username, whatcd):
+        """Create an user object.
+        # Parameters:
+            * username str: The user's name.
+            - whatcd object: the what.cd network object
+        """
+        WhatBase.__init__(self, whatcd)
+        self.name = username
+        self.whatcd = whatcd
+        self.userpage = "/user.php?"
+        self.userid = None
+        self.userinfo = None
+
+    def getUserName(self):
+        """
+            Returns user's name
+        """
+        return self.username
+
+    def getUserId(self):
+        """
+            Returns user's id, None if user doesn't exists
+        """
+        if self.userid:
+            return self.userid
+        else:
+            idform = {'action': "search", 'search': self.name}
+            data = urllib.urlencode(idform)
+            headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers
+            if dict(headers) is None:
+                return None
+            else:
+                self.userid = dict(headers)['location'][12:]
+                return self.userid
+
+    def getInfo(self):
+        """
+            Returns a dictionary of {percentile:{dataup str,
+                                                 datadown str,
+                                                 overall str,
+                                                 postmade str,
+                                                 boutyspent str,
+                                                 reqfilled str,
+                                                 artistsadded str,
+                                                 torrentsup str},
+                                     stats: {uploaded str,
+                                             ratio str,
+                                             joined str,
+                                             downloaded str,
+                                             lastseen str,
+                                             rratio str},
+                                     community: {uploaded tuple(total str, url str),
+                                                 forumposts tuple(total str, url str),
+                                                 invited tuple (total,None),
+                                                 perfectflacs tuple(total str, url str),
+                                                 contributedcollages tuple(total str, url str),
+                                                 reqvoted tuple(total str, url str),
+                                                 uniquegroups tuple(total str, url str)
+                                                 torrentscomments tuple(total str, url str),
+                                                 snatched tuple(total str, url str),
+                                                 artists str,
+                                                 reqfilled tuple(total str, url str),
+                                                 startedcollages tuple(total str, url str),
+                                                 leeching tuple(total str, url str),
+                                                 seeding tuple(total str, url str)}
+                                                }
+            If paranoia is not Off, it returns None.
+        """
+        if self.getUserId():
+            form = {'id': self.getUserId()}
+            data = urllib.urlencode(form)
+            userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body)
+            info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name)
+            self.userinfo = info
+            return info
+        else:
+            print "no user id retrieved"
+            return None
+
+
+    def getTorrentsSeeding(self, page=1):
+        """
+            Returns a list with all user's uploaded music torrents
+            in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+            artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+            album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+        """
+        if self.userid is None:
+            self.userid = self.getUserId()
+        url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid,page)
+        torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+        return self._parser().torrentsList(torrentspage)
+
+    def getTorrentsSnatched(self,page=1):
+        """
+            Returns a list with all user's uploaded music torrents
+            in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+            artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+            album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+        """
+        if self.userid is None:
+            self.userid = self.getUserId()
+        url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid,page)
+        torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+        return self._parser().torrentsList(torrentspage)
+
+    def getTorrentsUploaded(self, page=1):
+        """
+            Returns a list with all user's uploaded music torrents
+            in form of dictionary {page(tuple with current and total),tag, dlurl, id,
+            artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.},
+            album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.}
+        """
+        if self.userid is None:
+            self.userid = self.getUserId()
+        url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid,page)
+        torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+        return self._parser().torrentsList(torrentspage)
+
+
+    def getTorrentsCommented(self, page=1):
+        """
+            Returns a list with all user's commented torrents
+            in form of dictionary {postid, torrentid, comment,postdate, pages}
+
+        """
+        if self.userid is None:
+            self.userid = self.getUserId()
+
+        url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1],page)
+        torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body)
+        return self._parser().postsList(torrentspage)
+
+
+
+    ###############################################
+    #              specific values                #
+    ###############################################
+
+
+    def specificUserInfo(self):
+        """
+            Returns specific attributes of user info. None if user's paranoia is on
+        """
+        info = SpecificInformation()
+        # Initialize attributes
+        info.joindate, info.lastseen, info.dataup, info.datadown,\
+            info.ratio, info.rratio,info.uppercentile,info.downpercentile, \
+            info.torrentsuppercentile,info.reqfilledpercentile,info.bountyspentpercentile, \
+            info.postsmadepercentile,info.artistsaddedpercentile,info.overallpercentile, \
+            info.postsmadecom,info.torrentscommentscom,info.collagesstartedcom,info.collagescontrcon, \
+            info.reqfilledcom,info.reqvotedcom,info.uploadedcom,info.uniquecom, info.perfectcom, \
+            info.seedingcom, info.leechingcom,info.snatchedcom,info.invitedcom,info.artistsaddedcom \
+            = (None,None, None, None,None,None,None,None,None,None,None,None,None, None,\
+                None,None,None,None,None,None,None,None,None,None,None,None,None,None)
+
+
+        if not self.userinfo and self.getInfo() is None:
+            pass
+        else:
+            ######## stats ###########
+            info.joindate = self.userinfo['stats']['joined']
+            info.lastseen = self.userinfo['stats']['lastseen']
+            info.dataup = self.userinfo['stats']['uploaded']
+            info.datadown =  self.userinfo['stats']['downloaded']
+            info.ratio = self.userinfo['stats']['ratio']
+            info.rratio = self.userinfo['stats']['rratio']
+            ######## percentile ###########
+            info.uppercentile = self.userinfo['percentile']['dataup']
+            info.downpercentile = self.userinfo['percentile']['datadown']
+            info.torrentsuppercentile = self.userinfo['percentile']['torrentsup']
+            info.reqfilledpercentile = self.userinfo['percentile']['reqfilled']
+            info.bountyspentpercentile = self.userinfo['percentile']['bountyspent']
+            info.postsmadepercentile = self.userinfo['percentile']['postsmade']
+            info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded']
+            info.overallpercentile = self.userinfo['percentile']['overall']
+            ######## community ###########
+            info.postsmadecom = self.userinfo['community']['forumposts']
+            info.torrentscomments = self.userinfo['community']['torrentscomments']
+            info.collagesstartedcom = self.userinfo['community']['startedcollages']
+            info.collagescontrcon = self.userinfo['community']['contributedcollages']
+            info.reqfilledcom = self.userinfo['community']['reqfilled']
+            info.reqvotedcom = self.userinfo['community']['reqvoted']
+            info.uploadedcom = self.userinfo['community']['uploaded']
+            info.uniquecom = self.userinfo['community']['uniquegroups']
+            info.perfectcom = self.userinfo['community']['pefectflacs']
+            info.seedingcom = self.userinfo['community']['seeding']
+            info.leechingcom = self.userinfo['community']['leeching']
+            info.snatchedcom = self.userinfo['community']['snatched']
+            info.invitedcom = self.userinfo['community']['invited'][0]
+            info.artistsaddedcom = self.userinfo['community']['artists']
+
+
+
+        return info
+
+
+class Torrent(WhatBase):
+    """A What.CD torrent"""
+
+    def __init__(self, id, page, isparent, whatcd):
+        """Create a torrent object.
+        # Parameters:
+            * id str: The torrent's id.
+            * whatcd object: the WhatCD network object
+            * page: The torrent page's number [optional]
+        """
+        WhatBase.__init__(self, whatcd)
+        self.id = id
+        self.page = page
+        self.whatcd = whatcd
+        self.isParent = isparent
+        self.torrentpage = "/torrents.php?"
+        self.torrentinfo = self.getInfo()
+
+
+    def getTorrentUrl(self):
+        """
+            Returns a dictionnary torrent's real URL
+        """
+        if self.isParent:
+            form = {'id': self.id, 'page':self.page}
+            data = urllib.urlencode(form)
+            return self.torrentpage + data
+        else:
+            form = {'torrentid': self.id, 'page':self.page}
+        data = urllib.urlencode(form)
+        headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers
+        if dict(headers) is None:
+            return None
+        else:
+            return dict(headers)['location']
+
+
+    def getInfo(self):
+        """
+            Returns a dictionnary with torrents's info
+        """
+        torrentpage = BeautifulSoup(self._request("GET", "/"+self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body)
+
+        if 'Site log' in torrentpage.find("title").string:
+            print "no torrent retrieved with such id"
+            return None
+        else:
+            return self._parser().torrentInfo(torrentpage, self.id, self.isParent)
+
+
+    def getTorrentParentId(self):
+        """
+            Returns torrent's group id
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['parentid']
+
+    def getTorrentDownloadURL(self):
+        """
+            Returns relative url to download the torrent
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['downloadurl']
+
+    def getTorrentDetails(self):
+        """
+            Returns torrent's details (format / bitrate)
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['details']
+
+    def getTorrentEditionInfo(self):
+        """
+            Returns torrent's edition info (Edition information / media type)
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['editioninfo']
+
+    def getTorrentMediaType(self):
+        """
+            Returns torrent's media type
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['rlsmedia']
+
+    def getTorrentSize(self):
+        """
+            Returns torrent's size
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['size']
+
+
+    def getTorrentSnatched(self):
+        """
+            Returns torrent's total snatches
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['snatched']
+
+
+    def getTorrentSeeders(self):
+        """
+            Returns torrent's current seeders
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['seeders']
+
+    def getTorrentLeechers(self):
+        """
+            Returns torrent's current leechers
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['leechers']
+
+    def getTorrentUploadedBy(self):
+        """
+            Returns torrent's uploader
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['uploadedby']
+
+    def getTorrentFolderName(self):
+        """
+            Returns torrent's folder name
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['foldername']
+
+    def getTorrentFileList(self):
+        """
+            Returns torrent's file list
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['filelist']
+
+
+    def getTorrentReleaseType(self):
+        """
+            Returns torrent's release type
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['rlstype']
+
+    def getTorrentDescription(self):
+        """
+            Returns torrent's description / empty string is there's none
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['torrentdescription']
+
+    def getTorrentComments(self):
+        """
+            Returns a list of dictionnaries with each comment in the torrent page
+            {postid,post,userid,username}
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['comments']
+
+    def getTorrentCommentsPagesNumber(self):
+        """
+            Returns number of pages of comments in the torrent
+        """
+        if self.torrentinfo:
+            return self.torrentInfo['torrent']['commentspages']
+
+    def isTorrentFreeLeech(self):
+        """
+            Returns True if torrent is freeleeech, False if not
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['isfreeleech']
+
+    def isTorrentReported(self):
+        """
+            Returns True if torrent is reported, False if not
+        """
+        if self.torrentinfo:
+            return self.torrentinfo['torrent']['isreported']
+
+
+class Artist(WhatBase):
+    """A What.CD artist"""
+
+    def __init__(self, name, whatcd):
+        """Create an artist object.
+        # Parameters:
+            * name str: The artist's name.
+            * whatcd object: The WhatCD network object
+        """
+        WhatBase.__init__(self, whatcd)
+        self.name = name
+        self.whatcd = whatcd
+        self.artistpage = "/artist.php"
+        self.utils = Utils()
+        self.info = self.getInfo()
+
+
+    def getArtistName(self):
+        """
+            Returns artist's name
+        """
+        return self.name
+
+    def getArtistId(self):
+        """
+            Returns artist's id, None if artist's not found
+        """
+        form = {'artistname': self.name}
+        data = urllib.urlencode(form)
+        headers = self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).headers
+        if dict(headers)['location'][0:14] != 'artist.php?id=':
+            return None
+        else:
+            return dict(headers)['location'][14:]
+
+    def getInfo(self):
+        """
+            Returns artist's info, None if there isn't
+        """
+        if self.getArtistId():
+            form = {'id': self.getArtistId()}
+            data = urllib.urlencode(form)
+            artistpage = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
+            return self._parser().artistInfo(artistpage)
+        else:
+            print "no artist info retrieved"
+            return None
+
+    def getArtistReleases(self):
+        """
+            Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id}
+        """
+        return self.info['releases']
+
+    def getArtistImage(self):
+        """
+            Return the artist image URL, None if there's no image
+        """
+        return self.info['image']
+
+    def getArtistInfo(self):
+        """
+            Return the artist's info, blank string if none
+        """
+        return self.info['info']
+
+    def getArtistTags(self):
+        """
+            Return a list with artist's tags
+        """
+        return self.info['tags']
+
+    def getArtistSimilar(self):
+        """
+            Return a list with artist's similar artists
+        """
+        return self.info['similarartists']
+
+    def getArtistRequests(self):
+        """
+            Returns a list with all artist's requests in form of dictionary {requestname, id}
+        """
+        return self.info['requests']
+
+    def setArtistInfo(self, id, info):
+        """
+            Updates what.cd artist's info and image
+            Returns 1 if artist info updated succesfully, 0 if not.
+        # Parameters:
+            * id str: what.cd artist's id
+            * info tuple: (The artist's info -str-, image url -str- (None if there isn't))
+        """
+        if info[0]:
+            params = {'action': 'edit','artistid':id}
+            data = urllib.urlencode(params)
+
+            edit_page = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body)
+            what_form = self._parser().whatForm(edit_page,'edit')
+            if info[1]:
+                image_to_post = info[1]
+            else:
+                image_to_post = what_form['image']
+            data_to_post = {'body': info[0].encode('utf-8'),
+                            'summary':'automated artist info insertion',\
+                            'image':image_to_post,\
+                            'artistid':what_form['artistid'],\
+                            'auth':what_form['auth'],\
+                            'action':what_form['action']}
+
+            #post artist's info
+            self.whatcd.headers['Content-type']="application/x-www-form-urlencoded"
+            response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False)
+            artist_id_returned = dict(response.headers)['location'][14:]
+
+            if str(artist_id_returned) == str(what_form['artistid']) :
+                return 1
+            else:
+                return 0
+
+        else:
+             return 'no artist info provided. Aborting.'
+             exit()
+
+
+class Parser(object):
+
+        def __init__(self,whatcd):
+            self.utils = Utils()
+            self.whatcd = whatcd
+            self.totalpages = 0
+
+	def authenticatedUserInfo(self, dom):
+            """
+                Parse the index page and returns a dictionnary with basic authenticated user information
+            """
+            userInfo = {}
+            soup = BeautifulSoup(str(dom))
+            for ul in soup.fetch('ul'):
+                if ul["id"] == "userinfo_username":
+                    #retrieve user logged id
+                    hrefid = ul.findAll('li')[0].find("a")["href"]
+                    regid = re.compile('[0-9]+')
+                    if regid.search(hrefid) is None:
+                        self.debugMessage("not found  href to retrieve user id")
+                    else:
+                        userInfo["id"] = regid.search(hrefid).group(0)
+
+                    #retrieve user logged id
+                    hrefauth = ul.findAll('li')[2].find("a")["href"]
+                    regauth = re.compile('=[0-9a-fA-F]+')
+                    if regid.search(hrefid) is None:
+                        self.debugMessage("not found  href to retrieve user id")
+                    else:
+                        userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:]
+
+                elif ul["id"] == "userinfo_stats":
+                    if len(ul.findAll('li')) > 0:
+                        userInfo["uploaded"] = ul.findAll('li')[0].find("span").string
+                        userInfo["downloaded"] = ul.findAll('li')[1].find("span").string
+                        userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string
+                        userInfo["required"] = ul.findAll('li')[3].find("span").string
+                        userInfo["authenticate"] = True
+
+            return userInfo
+
+	def userInfo(self, dom, user):
+            """
+                Parse an user's page and returns a dictionnary with its information
+
+            # Parameters:
+                * dom str: user page html
+                * user str: what.cd username
+            """
+            userInfo = {'stats':{}, 'percentile':{}, 'community':{}}
+            soup = BeautifulSoup(str(dom))
+
+            for div in soup.fetch('div',{'class':'box'}):
+
+                #if paronoia is not set to 'Off', stop collecting data
+                if div.findAll('div')[0].string == "Personal":
+                    if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off":
+                        return None
+
+            statscontainer = soup.findAll('div', {'class':'box'})[1]
+            percentilecontainer = soup.findAll('div', {'class':'box'})[2]
+            communitycontainer = soup.findAll('div', {'class':'box'})[4]
+
+
+            userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title']
+            userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title']
+            userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:]
+            userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:]
+            userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string
+            userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:]
+            userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:]
+            userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:]
+            userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:]
+            userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:]
+            userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:]
+            userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:]
+            userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:]
+            userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:]
+
+            userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[0].find('a')['href'])
+            userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[1].find('a')['href'])
+            userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[2].find('a')['href'])
+            userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[3].find('a')['href'])
+            userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[4].find('a')['href'])
+            userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[5].find('a')['href'])
+            userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[6].find('a')['href'])
+            userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[7].find('a')['href'])
+            userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[8].find('a')['href'])
+            userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[9].find('a')['href'])
+            userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\
+                                                        communitycontainer.findAll('li')[10].find('a')['href'])
+            #NB: there's a carriage return and white spaces inside the snatched li tag
+            userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\
+                                                        communitycontainer.findAll('li')[11].find('a')['href'])
+            userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\
+                                                        None)
+            userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title']
+
+            return userInfo
+
+        def torrentInfo(self, dom, id, isparent):
+            """
+                Parse a torrent's page and returns a dictionnary with its information
+            """
+
+            torrentInfo = {'torrent':{}}
+            torrentfiles = []
+            torrentdescription = ""
+            isreported = False
+            isfreeleech = False
+            soup = BeautifulSoup(str(dom))
+            if isparent:
+                torrentInfo['torrent']['parentid'] = id
+            else:
+                groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href']
+                torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1]
+                regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray')
+                torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0)
+                torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=")+1:]
+                torrentInfo['torrent']['downloadurl'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a',{'title':'Download'})[0]['href']
+                ## is freeleech or/and reported? ##
+                #both
+                if len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 4:
+                    isreported = True
+                    isfreeleech = True
+                    torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+                #either
+                elif len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 2:
+                    if soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Reported':
+                        isreported = True
+                    elif soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!':
+                        isreported = True
+                    torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+                #none
+                else:
+                    torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0]
+                torrentInfo['torrent']['isfreeleech'] = isfreeleech
+                torrentInfo['torrent']['isreported'] = isreported
+                torrentInfo['torrent']['size'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[1].string
+                torrentInfo['torrent']['snatched'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[2].string
+                torrentInfo['torrent']['seeders'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[3].string
+                torrentInfo['torrent']['leechers'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[4].string
+                torrentInfo['torrent']['uploadedby'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('a')[0].string
+                foldername = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('div')[1].string
+                if(foldername is None):
+                    torrentInfo['torrent']['foldername'] = None
+                else:
+                    torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername)
+                files = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('tr')
+                for file in files[1:-1]:
+                    torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string))
+                torrentInfo['torrent']['filelist'] = torrentfiles
+                #is there any description?
+                if len(soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')) > 1:
+                    description = torrentInfo['torrent']['description'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')[1].contents
+                    info = ''
+                    for content in description:
+                        if content.string:
+                            info = "%s%s" % (info, self.utils._string(content.string))
+                            torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string))
+                torrentInfo['torrent']['torrentdescription'] = torrentdescription
+                regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown')
+                torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0)
+
+            torrentInfo['torrent']['comments'] = []
+            torrentInfo['torrent']['commentspages'] = 0
+
+            if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0:
+                linkbox = dom.findAll("div", {"class": "linkbox"})[-1]
+                pages = 1
+                postid = ''
+                userid = ''
+                post = ''
+                # if there's more than 1 page of torrents
+                if linkbox.find("a"):
+                    # by default torrent page show last page of comments
+                    lastpage = linkbox.findAll("a")[-1]['href']
+                    pages = int(lastpage[18:lastpage.find('&')]) +1
+                for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}):
+                    postid = comment.find("a",{"class":"post_id"}).string[1:]
+                    userid = comment.findAll("a")[1]['href'][12:]
+                    username = comment.findAll("a")[1].string
+                    post = comment.find("div", {"id":"content"+postid})
+                    post = u''.join([post.string for post in post.findAll(text=True)])
+                    torrentInfo['torrent']['comments'].append({"postid":postid,"post":post,"userid":userid,"username":username})
+
+                torrentInfo['torrent']['commentspages'] = pages
+
+            return torrentInfo
+
+        def artistInfo(self, dom):
+            """
+                Parse an artist's page and returns a dictionnary with its information
+            """
+            artistInfo = {}
+            releases = []
+            requests = []
+            infoartist = ""
+            tagsartist = []
+            similarartists = []
+            soup = BeautifulSoup(str(dom))
+            soupfetch = soup.fetch('table',{'class':'torrent_table'})
+            if not soupfetch:
+                soupfetch = soup.fetch('table',{'class':'torrent_table grouped release_table'})
+            for releasetype in soupfetch:
+                releasetypenames = releasetype.findAll('strong')
+                releasetypename = releasetype.findAll('strong')[0].string
+                for release in releasetypenames[1:-1]:
+                    #skip release edition info and Freeleech! <strong>s
+                    if len(release.parent.contents) > 1 and len(release.contents) > 1 :
+                        releaseyear = release.contents[0][0:4]
+                        releasename = release.contents[1].string
+                        releasehref = release.contents[1]['href']
+                        releaseid = releasehref[releasehref.rfind('=')+1:]
+                        releases.append({'releasetype':releasetypename,\
+                         'year': releaseyear,'name':self.utils.decodeHTMLEntities(releasename),'id':releaseid})
+
+            artistInfo['releases'] = releases
+            #is there an artist image?
+            artistInfo['image'] = None
+            if soup.find('div', {'class':'box'}).find('img'):
+                artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src']
+            #is there any artist info?
+            contents = soup.find('div', {'class':'body'}).contents
+            if len(contents) > 0:
+                for content in contents:
+                    if content.string:
+                        infoartist = "%s%s" % (infoartist, self.utils._string(content.string))
+            artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist)
+            #is there any artist tags?
+            if soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li'):
+                ul = soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li')
+                for li in ul:
+                    if li.contents[0].string:
+                        tagsartist.append(self.utils._string(li.contents[0].string))
+            artistInfo['tags'] = tagsartist
+            #is there any similar artist?
+            if soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'}):
+                artists = soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'})
+                for artist in artists:
+                    if artist.contents[0].string:
+                        similarartists.append(self.utils._string(artist.contents[0].string))
+            artistInfo['similarartists'] = similarartists
+            #is there any request?
+            if soup.find('table',{'id':'requests'}):
+                for request in soup.find('table',{'id':'requests'}).findAll('tr',{'class':re.compile('row')}):
+                    requests.append({'requestname':request.findAll('a')[1].string,'id':request.findAll('a')[1]['href'][28:]})
+
+            artistInfo['requests'] = requests
+
+            return artistInfo
+
+        def torrentsList(self,dom):
+            """
+                Parse a torrent's list page and returns a dictionnary with its information
+            """
+            torrentslist = []
+            torrentssoup = dom.find("table", {"width": "100%"})
+            pages = 0
+
+            #if there's at least 1 torrent in the list
+            if torrentssoup:
+                navsoup = dom.find("div", {"class": "linkbox"})
+                pages = 1
+                regyear = re.compile('\[\d{4}\]')
+
+                #is there a page navigation bar?
+                if navsoup.contents:
+                    #if there's more than 1 page of torrents
+                    if navsoup.contents[-1].has_key('href'):
+                        lastpage = navsoup.contents[-1]['href']
+                        pages = lastpage[18:lastpage.find('&')]
+                        self.totalpages = pages
+                    else: #we are at the last page, no href
+                        pages = self.totalpages+1
+                #fetch all tr except first one (column head)
+                for torrent in torrentssoup.fetch('tr')[1:]:
+                    #exclude non music torrents
+                    if torrent.find('td').find('div')['class'][0:10] == 'cats_music':
+
+                        torrenttag = torrent.find('td').contents[1]['title']
+                        torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href']
+                        torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href']
+                        torrentid = torrentrm[torrentrm.rfind('=')+1:]
+                        torrenttd = torrent.findAll('td')[1]
+
+                        # remove dataless elements
+                        torrenttags = torrenttd.div
+                        rightlinks = torrenttd.span
+                        torrenttags.extract()
+                        rightlinks.extract()
+
+                        # remove line breaks
+                        torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")])
+                        torrenttd = BeautifulSoup(torrenttd)
+                        isScene = False
+                        info = ""
+
+                        if len(torrenttd.findAll('a')) == 2:
+                            #one artist
+                            torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),)
+                            artistid = (torrenttd.find("a")['href'][14:],)
+                            torrentalbum = torrenttd.findAll("a")[1].string
+                            info = torrenttd.findAll("a")[1].nextSibling.string.strip()
+
+
+                        elif len(torrenttd.findAll('a')) == 1:
+                            #various artists
+                            torrentartist = ('Various Artists',)
+                            artistid = ()
+                            torrentalbum = torrenttd.find("a").string
+                            info = torrenttd.find("a").nextSibling.string.strip()
+
+                        elif len(torrenttd.findAll('a')) == 3:
+                            #two artists
+                            torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string), \
+                                self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string))
+                            artistid = (torrenttd.findAll("a")[0]['href'][14:],\
+                                torrenttd.findAll("a")[1]['href'][14:])
+                            torrentalbum = torrenttd.findAll("a")[2].string
+                            info = torrenttd.findAll("a")[2].nextSibling.string.strip()
+
+                        elif torrenttd.find(text=re.compile('performed by')):
+                            #performed by
+                            torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),)
+                            artistid = (torrenttd.findAll("a")[-2]['href'][14:],)
+                            torrentalbum = torrenttd.findAll("a")[-1].string
+                            info = torrenttd.findAll("a")[-1].nextSibling.string.strip()
+
+                        if 'Scene' in info:
+                            isScene = True
+
+                        torrentyear = regyear.search(info).group(0)[1:5]
+                        torrentslist.append({'tag':torrenttag,\
+                                            'dlurl':torrentdl,\
+                                            'id':torrentid, \
+                                            'artist':torrentartist,\
+                                            'artistid':artistid,\
+                                            'album':self.utils.decodeHTMLEntities(torrentalbum),
+                                            'year':torrentyear,
+                                            'pages':pages,
+                                            'scene':isScene})
+
+            return torrentslist
+
+        def postsList(self,dom):
+            """
+                Parse a post list page and returns a dictionnary with each post information:
+                {torrentid, commentid, postid}
+            """
+            postslist = []
+            postssoup = dom.find("div", {"class": "thin"})
+            pages = 0
+
+            #if there's at least 1 post in the list
+            if postssoup:
+                navsoup = dom.find("div", {"class": "linkbox"})
+
+                #if there's more than 1 page of torrents
+                if navsoup.find("a"):
+                    lastpage = navsoup.findAll("a")[1]['href']
+                    pages = lastpage[18:lastpage.find('&')]
+                    self.totalpages = pages
+                else: #we are at the last page, no link
+                    pages = 1
+
+                for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}):
+                    commentbody = post.find("td", {"class":"body"})
+                    postid = post.find("span").findAll("a")[0].string[1:]
+                    torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=')+1:]
+                    comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)])
+                    postdate = post.find("span", {"class":"time"})['title']
+                    postslist.append({'postid':postid,\
+                                        'torrentid':torrentid,\
+                                        'comment':comment,\
+                                        'postdate':postdate,\
+                                        'pages':pages})
+
+
+            return postslist
+
+
+        def whatForm(self, dom, action):
+            """
+                Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values
+                # Parameters:
+                    * dom str: the edit page dom.
+                    + action str: the action value from the requested form
+            """
+            inputs = {}
+
+            form = dom.find('input',{'name':'action','value':action}).parent
+            elements = form.fetch(('input','textarea'))
+            #get all form elements except for submit input
+            for element in elements[0:-1]:
+                name = element.get('name',None)
+                if element.name == 'textarea':
+                    inputs[name] = element.string
+                else:
+                    inputs[name] = element.get('value',None)
+            return inputs
+
+
+
+if __name__ == "__main__":
+	print "Module to manage what.cd as a web service"