From cb22e917cdf93789cf3cb1111a801a07f6e72411 Mon Sep 17 00:00:00 2001 From: Aaron Cohen Date: Tue, 11 Sep 2012 00:13:12 -0700 Subject: [PATCH] What.cd WIP, now using api rather than RSS --- headphones/__init__.py | 14 +- headphones/searcher.py | 94 +-- headphones/webserve.py | 20 +- lib/whatapi.py | 1442 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1507 insertions(+), 63 deletions(-) create mode 100755 lib/whatapi.py diff --git a/headphones/__init__.py b/headphones/__init__.py index 761e57db..bf9738b9 100644 --- a/headphones/__init__.py +++ b/headphones/__init__.py @@ -155,8 +155,8 @@ WAFFLES = None WAFFLES_UID = None WAFFLES_PASSKEY = None WHATCD = None -WHATCD_UID = None -WHATCD_PASSKEY = None +WHATCD_USERNAME = None +WHATCD_PASSWORD = None DOWNLOAD_TORRENT_DIR = None INTERFACE = None @@ -251,7 +251,7 @@ def initialize(): LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \ RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, \ ADD_ALBUM_ART, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \ - TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_UID, WHATCD_PASSKEY, DOWNLOAD_TORRENT_DIR, \ + TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, WHATCD, WHATCD_USERNAME, WHATCD_PASSWORD, DOWNLOAD_TORRENT_DIR, \ LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \ NZBMATRIX, NZBMATRIX_USERNAME, NZBMATRIX_APIKEY, NEWZNAB, NEWZNAB_HOST, NEWZNAB_APIKEY, NEWZNAB_ENABLED, EXTRA_NEWZNABS,\ NZBSORG, NZBSORG_UID, NZBSORG_HASH, NEWZBIN, NEWZBIN_UID, NEWZBIN_PASSWORD, LASTFM_USERNAME, INTERFACE, FOLDER_PERMISSIONS, \ @@ -348,8 +348,8 @@ def initialize(): WAFFLES_PASSKEY = check_setting_str(CFG, 'Waffles', 'waffles_passkey', '') WHATCD = bool(check_setting_int(CFG, 'What.cd', 'whatcd', 0)) - WHATCD_UID = check_setting_str(CFG, 'What.cd', 'whatcd_uid', '') - WHATCD_PASSKEY = check_setting_str(CFG, 'What.cd', 'whatcd_passkey', '') + WHATCD_USERNAME = check_setting_str(CFG, 'What.cd', 'whatcd_username', '') + WHATCD_PASSWORD = check_setting_str(CFG, 'What.cd', 'whatcd_password', '') SAB_HOST = check_setting_str(CFG, 'SABnzbd', 'sab_host', '') SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '') @@ -631,8 +631,8 @@ def config_write(): new_config['What.cd'] = {} new_config['What.cd']['whatcd'] = int(WHATCD) - new_config['What.cd']['whatcd_uid'] = WHATCD_UID - new_config['What.cd']['whatcd_passkey'] = WHATCD_PASSKEY + new_config['What.cd']['whatcd_username'] = WHATCD_USERNAME + new_config['What.cd']['whatcd_password'] = WHATCD_PASSWORD new_config['General']['search_interval'] = SEARCH_INTERVAL new_config['General']['libraryscan_interval'] = LIBRARYSCAN_INTERVAL diff --git a/headphones/searcher.py b/headphones/searcher.py index ec5c86ea..d54d6d76 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -15,6 +15,7 @@ import urllib, urllib2, urlparse import lib.feedparser as feedparser +import lib.whatapi as whatapi from xml.dom import minidom from xml.parsers.expat import ExpatError from StringIO import StringIO @@ -808,69 +809,70 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False): if headphones.WHATCD: provider = "What.cd" - providerurl = url_fix("https://www.what.cd/browse.php") bitrate = None if headphones.PREFERRED_QUALITY == 3 or losslessOnly: - format = "FLAC" - bitrate = "(Lossless)" + format_regex = "FLAC" maxsize = 10000000000 elif headphones.PREFERRED_QUALITY: - format = "FLAC OR MP3" + format_regex = "(FLAC|MP3)" maxsize = 10000000000 else: - format = "MP3" + format_regex = "MP3" maxsize = 300000000 - query_items = ['artist:"%s"' % artistterm, - 'album:"%s"' % albumterm, - 'format:(%s)' % format, - 'size:[0 TO %d]' % maxsize, - '-seeders:0'] # cut out dead torrents - if bitrate: - query_items.append('bitrate:"%s"' % bitrate) - - params = { - "uid": headphones.WHATCD_UID, - "passkey": headphones.WHATCD_PASSKEY, - "rss": "1", - "c0": "1", - "s": "seeders", # sort by - "d": "desc" # direction - } - - searchURL = "%s?%s&q=%s" % (providerurl, urllib.urlencode(params), urllib.quote(" ".join(query_items))) - try: - data = urllib2.urlopen(searchURL, timeout=20).read() - except urllib2.URLError, e: - logger.warn('Error fetching data from %s: %s' % (provider, e)) - data = False + whatcd = whatapi.getWhatcdNetwork(headphones.WHATCD_USERNAME, headphones.WHATCD_PASSWORD) + except: + whatcd = None + logger.warn("What.cd credentials incorrect or site is down.") - if data: + if whatcd: + whatcd.enableCaching() - d = feedparser.parse(data) - if not len(d.entries): - logger.info(u"No results found from %s for %s" % (provider, term)) - pass + artist = whatcd.getArtist(artistterm) + artist_id = artist.getArtistId() + else: + artist_id = None - else: - for item in d.entries: - try: - title_match = re.search(r"(.+)\[(.+)\]$", item.title) - title = title_match.group(1).strip() - details = title_match.group(2).split("-") + if artist_id: # will be None if artist not found + logger.info(u"What.cd artist ID: %s" % artist_id) + artist_releases = artist.getArtistReleases() + logger.info(u"Found %d releases on what.cd for %s" % (len(artist_releases), artistterm)) + #Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id} + else: + artist_releases = [] - desc_match = re.search(r"Size: (\d+)<", item.description) - size = desc_match.group(1) + possible_matches = [ release for release in artist_releases if albumterm in release['name'] ] - url = item.link + # cap at 10 matches, 1 per second to reduce hits on API...don't wanna get in trouble. + # Might want to turn up number of matches later. + max_torrent_info_reads = 10 + info_read_rate = 1 - resultlist.append((title, size, url, provider)) - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - except Exception, e: - logger.error(u"An error occurred while trying to parse the response from What.cd: %s" % e) + match_torrents = [] + for i, release in enumerate(possible_matches[:max_torrent_info_reads]): + if i > 0: + time.sleep(info_read_rate) + match_torrents.append(whatcd.getTorrent(release['id'])) + # filter on format, size, and num seeders + match_torrents = [ torrent for torrent in match_torrents + if re.search(format_regex, torrent.getTorrentDetails(), flags=re.I) + and torrent.getTorrentSize() <= maxsize + and torrent.getTorrentSeeders() >= minimumseeders ] + + # sort by times d/l'd + if not len(possible_matches): + logger.info(u"No results found from %s for %s after filtering" % (provider, term)) + elif len(match_torrents) > 1: + match_torrents.sort(match_torrents, key=whatapi.Torrent.getTorrentSeeders) + + for torrent in match_torrents: + resultlist.append((torrent.getTorrentFolderName(), + torrent.getTorrentSize(), + torrent.getTorrentDownloadURL(), + provider)) if headphones.ISOHUNT: provider = "isoHunt" diff --git a/headphones/webserve.py b/headphones/webserve.py index 5a5cc31f..6e225166 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -541,21 +541,21 @@ class WebInterface(object): return serve_template(templatename="config.html", title="Settings", config=config) config.exposed = True - - + + def configUpdate(self, http_host='0.0.0.0', http_username=None, http_port=8181, http_password=None, launch_browser=0, api_enabled=0, api_key=None, download_scan_interval=None, nzb_search_interval=None, libraryscan_interval=None, sab_host=None, sab_username=None, sab_apikey=None, sab_password=None, sab_category=None, download_dir=None, blackhole=0, blackhole_dir=None, usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, newznab=0, newznab_host=None, newznab_apikey=None, newznab_enabled=0, nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, newzbin=0, newzbin_uid=None, newzbin_password=None, preferred_quality=0, preferred_bitrate=None, detect_bitrate=0, move_files=0, torrentblackhole_dir=None, download_torrent_dir=None, - numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, rename_files=0, correct_metadata=0, - cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None, - include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, - interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None, - encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0, prowl_enabled=0, prowl_onsnatch=0, - prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0, xbmc_notify=0, nma_enabled=False, - nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None, customsleep=None, hpuser=None, hppass=None, - preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs): + numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, whatcd=0, whatcd_uid=None, whatcd_passkey=None, + rename_files=0, correct_metadata=0, cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, + folder_format=None, file_format=None, include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0, + autowant_upcoming=False, autowant_all=False, interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None, + encoderfolder=None, advancedencoder=None, encoderoutputformat=None, encodervbrcbr=None, encoderquality=None, encoderlossless=0, delete_lossless_files=0, + prowl_enabled=0, prowl_onsnatch=0, prowl_keys=None, prowl_priority=0, xbmc_enabled=0, xbmc_host=None, xbmc_username=None, xbmc_password=None, xbmc_update=0, + xbmc_notify=0, nma_enabled=False, nma_apikey=None, nma_priority=0, nma_onsnatch=0, synoindex_enabled=False, mirror=None, customhost=None, customport=None, + customsleep=None, hpuser=None, hppass=None, preferred_bitrate_high_buffer=None, preferred_bitrate_low_buffer=None, **kwargs): headphones.HTTP_HOST = http_host headphones.HTTP_PORT = http_port diff --git a/lib/whatapi.py b/lib/whatapi.py new file mode 100755 index 00000000..bc6f4394 --- /dev/null +++ b/lib/whatapi.py @@ -0,0 +1,1442 @@ +# -*- coding: utf_8 -*- +################################################################################# +# +# Name: whatapi.py +# +# Synopsis: Module to manage what.cd as a web service +# +# Description: See below list of the implemented webservices +# +# Copyright 2010 devilcius +# +# The Wide Open License (WOL) +# +# Permission to use, copy, modify, distribute and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice and this license appear in all source copies. +# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF +# ANY KIND. See http://www.dspguru.com/wide-open-license for more information. +# +################################################################################# + + +__author__="devilcius" +__date__ ="$Oct 23, 2010 11:21:12 PM$" + + +import hashlib +try: + from BeautifulSoup import BeautifulSoup +except: + raise ImportError,"Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download" +import httplib +import os +import pickle +import re +import urllib +import shelve +import tempfile +from htmlentitydefs import name2codepoint as n2cp + + +""" +A list of the implemented webservices (from what.cd ) +===================================== + +# User + + * user.getUserId + * user.getInfo + + * user.getTorrentsSeeding + * user.getTorrentsSnatched + * user.getTorrentsUploaded + * user.getTorrentsCommented + + * user.specificUserInfo + Atributes: + ######## stats ########### + -joindate + -lastseen + -dataup + -datadown + -ratio + -rratio + ######## percentile ########### + -uppercentile + -downpercentile + -torrentsuppercentile + -reqfilledpercentile + -bountyspentpercentile + -postsmadepercentile + -artistsaddedpercentile + -overallpercentile + ######## community ########### + -postsmade + -torrentscomments + -collagesstarted + -collagescontr + -reqfilled + -reqvoted + -uploaded + -unique + -perfect + -seeding + -leeching + -snatched + -invited + -artistsadded + + +# Artist + + * artist.getArtistReleases + * artist.getArtistImage + * artist.getArtistInfo + * artist.getArtistTags + * artist.getArtistSimilar + * artist.getArtistRequests + + + artist.setArtistInfo + + +# Torrent + + * torrent.getTorrentParentId + * torrent.getTorrentDownloadURL + * torrent.getTorrentDetails + * torrent.getTorrentSize + * torrent.getTorrentSnatched + * torrent.getTorrentSeeders + * torrent.getTorrentLeechers + * torrent.getTorrentUploadedBy + * torrent.getTorrentFolderName + * torrent.getTorrentFileList + * torrent.getTorrentDescription + * torrent.getTorrentComments + * torrent.isTorrentFreeLeech + * torrent.isTorrentReported + + +# Authenticate + + * authenticate.getAuthenticatedUserId + * authenticate.getAuthenticatedUserAuthCode + * authenticate.getAuthenticatedUserDownload + * authenticate.getAuthenticatedUserUpload() + * authenticate.getAuthenticatedUserRatio + * authenticate.getAuthenticatedUserRequiredRatio + +""" + +class ResponseBody: + """A Response Body Object""" + pass + +class SpecificInformation: + """A Specific Information Object""" + pass + + +class WhatBase(object): + """An abstract webservices object.""" + whatcd = None + + def __init__(self, whatcd): + self.whatcd = whatcd + #if we are not autenticated in what.cd, do it now + if not self.whatcd.isAuthenticated(): + print "authenticating..." + self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader() + + def _request(self,type, path, data, headers): + return Request(self.whatcd,type,path,data,headers) + + def _parser(self): + return Parser(self.whatcd) + + def utils(self): + return Utils() + + +class Utils(): + + def md5(self, text): + """Returns the md5 hash of a string.""" + + h = hashlib.md5() + h.update(self._string(text)) + + return h.hexdigest() + + def _unicode(self, text): + if type(text) == unicode: + return text + + if type(text) == int: + return unicode(text) + + return unicode(text, "utf-8") + + def _string(self, text): + if type(text) == str: + return text + + if type(text) == int: + return str(text) + + return text.encode("utf-8") + + def _number(self,string): + """ + Extracts an int from a string. Returns a 0 if None or an empty string was passed + """ + + if not string: + return 0 + elif string == "": + return 0 + else: + try: + return int(string) + except ValueError: + return float(string) + + def substituteEntity(self, match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + + if cp: + return unichr(cp) + else: + return match.group() + + def decodeHTMLEntities(self, string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(self.substituteEntity, string)[0] + + + +class WhatCD(object): + + def __init__(self, username, password, site, loginpage, headers): + + #credentials + self.username = username + self.password = password + self.site = site + self.loginpage = loginpage + self.headers = headers + self.authenticateduserinfo = {} + + self.cache_backend = None + self.proxy_enabled = False + self.proxy = None + + def isAuthenticated(self): + """ + Checks if we are authenticated in what.cd + """ + if "id" in self.authenticateduserinfo: + return True + else: + return False + + def getCredentials(self): + """ + Returns an authenticated user credentials object + """ + return Authenticate(self) + + + def getUser(self, username): + """ + Returns an user object + """ + return User(username, self) + + def getTorrent(self, id, page=1): + """ + Returns a torrent object + """ + return Torrent(id, page, None, self) + + def getTorrentGroup(self, id, page=1): + """ + Returns a torrent object + """ + return Torrent(id, page, True, self) + + def getArtist(self, name): + """ + Returns an artist object + """ + return Artist(name, self) + + def enableProxy(self, host, port): + """Enable a default web proxy""" + self.proxy = [host, Utils()._number(port)] + self.proxy_enabled = True + + def disableProxy(self): + """Disable using the web proxy""" + self.proxy_enabled = False + + def isProxyEnabled(self): + """Returns True if a web proxy is enabled.""" + return self.proxy_enabled + + def getProxy(self): + """Returns proxy details.""" + return self.proxy + + def enableCaching(self, file_path = None): + """Enables caching request-wide for all cachable calls. + * file_path: A file path for the backend storage file. If + None set, a temp file would probably be created, according the backend. + """ + if not file_path: + file_path = tempfile.mktemp(prefix="whatapi_tmp_") + + self.cache_backend = _ShelfCacheBackend(file_path) + + def disableCaching(self): + """Disables all caching features.""" + self.cache_backend = None + + def isCachingEnabled(self): + """Returns True if caching is enabled.""" + + return not (self.cache_backend == None) + + def getCacheBackend(self): + + return self.cache_backend + +def getWhatcdNetwork(username = "", password = ""): + """ + Returns a preconfigured WhatCD object for what.cd + # Parameters: + * username str: a username of a valid what.cd user + * password str: user's password + """ + + return WhatCD ( + username = username, + password = password, + site = "ssl.what.cd", + loginpage = "/login.php", + headers = { + "Content-type": "application/x-www-form-urlencoded", + 'Accept-Charset': 'utf-8', + 'User-Agent': "whatapi [devilcius]" + }) + + + +class _ShelfCacheBackend(object): + """Used as a backend for caching cacheable requests.""" + def __init__(self, file_path = None): + self.shelf = shelve.open(file_path) + + def getHTML(self, key): + return self.shelf[key] + + def setHTML(self, key, xml_string): + self.shelf[key] = xml_string + + def hasKey(self, key): + return key in self.shelf.keys() + + +class Request(object): + """web service operation.""" + + def __init__(self, whatcd,type, path, data, headers): + + self.whatcd = whatcd + self.utils = Utils() + self.type = type + self.path = path + self.data = data + self.headers = headers + #enable catching? + if whatcd.isCachingEnabled(): + self.cache = whatcd.getCacheBackend() + + def getCacheKey(self): + """The cache key is a md5 hash of request params.""" + + key = self.type + self.path + self.data + return Utils().md5(key) + + def getCachedResponse(self): + """Returns a file object of the cached response.""" + + if not self.isCached(): + response = self.downloadResponse() + self.cache.setHTML(self.getCacheKey(), response) + return self.cache.getHTML(self.getCacheKey()) + + def isCached(self): + """Returns True if the request is already in cache.""" + + return self.cache.hasKey(self.getCacheKey()) + + def downloadResponse(self): + """Returns a ResponseBody object from the server.""" + + #print "downloading from %s" % (self.path) + conn = httplib.HTTPSConnection(self.whatcd.site) + rb = ResponseBody() + + if self.whatcd.isProxyEnabled(): + conn = httplib.HTTPSConnection(host = self.whatcd.getProxy()[0], port = self.whatcd.getProxy()[1]) + conn.request(method = self.type, url="https://" + self.whatcd.site + self.path, body = self.data, headers = self.headers) + else: + conn.request(self.type, self.path, self.data, self.headers) + + response = conn.getresponse() + rb.headers = response.getheaders() + # Rip all inline JavaScript out of the response in case it hasn't been properly escaped + rb.body = re.sub('', '', response.read()) + conn.close() + return rb + + def execute(self, cacheable = False): + """Depending if caching is enabled, returns response from the server or, if available, the cached response""" + if self.whatcd.isCachingEnabled() and cacheable: + response = self.getCachedResponse() + else: + response = self.downloadResponse() + + return response + +class Authenticate(WhatBase): + + def __init__(self, whatcd): + """Create an authenticated user object. + # Parameters: + * whatcd object: WhatCD object. + """ + self.whatcd = whatcd + self.parser = Parser(whatcd) + if not self.whatcd.isAuthenticated(): + self.getAuthenticatedHeader() + + def setCookie(self): + print "creating cookie" + f = open('cookie', 'w') + loginform= {'username': self.whatcd.username, 'password': self.whatcd.password \ + , 'keeplogged': '1', 'login': 'Login'} + data = urllib.urlencode(loginform) + response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True) + try: + cookie=dict(response.headers)['set-cookie'] + session=re.search("session=[^;]+", cookie).group(0) + self.whatcd.headers["Cookie"] = session + homepage = response.body + pickle.dump(self.whatcd.headers, f) + except (KeyError, AttributeError): + os.remove('cookie') + self.whatcd.headers = None +# quit() # Commented out...can't have this quitting headphones on us + raise Exception("Login failed, most likely bad creds or the site is down, nothing to do") + finally: + f.close() + + + def getAuthenticatedHeader(self): + """ + Log user in what.cd and returns the authenticated header + """ + homepage = None + if os.path.exists("cookie"): + f = open("cookie", "r") + try: + self.whatcd.headers = pickle.load(f) + except EOFError: + os.remove("cookie") + print "invalid cookie, removed" + self.setCookie() + else: + self.setCookie() + #set authenticated user info + if 'id' not in self.whatcd.authenticateduserinfo: + self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage) + + return self.whatcd.headers + + def getAuthenticatedUserInfo(self, homepage = None): + """ + Returns authenticated user's info + """ + if not homepage: + homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body) + authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"})) + return authuserinfo + + def getAuthenticatedUserId(self): + """ + Returns authenticated user's id + """ + return self.whatcd.authenticateduserinfo["id"] + + def getAuthenticatedUserAuthCode(self): + """ + Returns authenticated user's authcode + """ + return self.whatcd.authenticateduserinfo["authcode"] + + + def getAuthenticatedUserUpload(self): + """ + Returns authenticated user's total uploaded data + """ + return self.whatcd.authenticateduserinfo["uploaded"] + + + def getAuthenticatedUserDownload(self): + """ + Returns authenticated user's total downloaded data + """ + return self.whatcd.authenticateduserinfo["downloaded"] + + + def getAuthenticatedUserRatio(self): + """ + Returns authenticated user's ratio + """ + return self.whatcd.authenticateduserinfo["ratio"] + + def getAuthenticatedUserRequiredRatio(self): + """ + Returns authenticated user's required ratio + """ + return self.whatcd.authenticateduserinfo["required"] + + +class User(WhatBase): + """A What.CD user""" + + def __init__(self, username, whatcd): + """Create an user object. + # Parameters: + * username str: The user's name. + - whatcd object: the what.cd network object + """ + WhatBase.__init__(self, whatcd) + self.name = username + self.whatcd = whatcd + self.userpage = "/user.php?" + self.userid = None + self.userinfo = None + + def getUserName(self): + """ + Returns user's name + """ + return self.username + + def getUserId(self): + """ + Returns user's id, None if user doesn't exists + """ + if self.userid: + return self.userid + else: + idform = {'action': "search", 'search': self.name} + data = urllib.urlencode(idform) + headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers + if dict(headers) is None: + return None + else: + self.userid = dict(headers)['location'][12:] + return self.userid + + def getInfo(self): + """ + Returns a dictionary of {percentile:{dataup str, + datadown str, + overall str, + postmade str, + boutyspent str, + reqfilled str, + artistsadded str, + torrentsup str}, + stats: {uploaded str, + ratio str, + joined str, + downloaded str, + lastseen str, + rratio str}, + community: {uploaded tuple(total str, url str), + forumposts tuple(total str, url str), + invited tuple (total,None), + perfectflacs tuple(total str, url str), + contributedcollages tuple(total str, url str), + reqvoted tuple(total str, url str), + uniquegroups tuple(total str, url str) + torrentscomments tuple(total str, url str), + snatched tuple(total str, url str), + artists str, + reqfilled tuple(total str, url str), + startedcollages tuple(total str, url str), + leeching tuple(total str, url str), + seeding tuple(total str, url str)} + } + If paranoia is not Off, it returns None. + """ + if self.getUserId(): + form = {'id': self.getUserId()} + data = urllib.urlencode(form) + userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body) + info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name) + self.userinfo = info + return info + else: + print "no user id retrieved" + return None + + + def getTorrentsSeeding(self, page=1): + """ + Returns a list with all user's uploaded music torrents + in form of dictionary {page(tuple with current and total),tag, dlurl, id, + artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, + album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} + """ + if self.userid is None: + self.userid = self.getUserId() + url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid,page) + torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) + return self._parser().torrentsList(torrentspage) + + def getTorrentsSnatched(self,page=1): + """ + Returns a list with all user's uploaded music torrents + in form of dictionary {page(tuple with current and total),tag, dlurl, id, + artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, + album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} + """ + if self.userid is None: + self.userid = self.getUserId() + url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid,page) + torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) + return self._parser().torrentsList(torrentspage) + + def getTorrentsUploaded(self, page=1): + """ + Returns a list with all user's uploaded music torrents + in form of dictionary {page(tuple with current and total),tag, dlurl, id, + artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, + album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} + """ + if self.userid is None: + self.userid = self.getUserId() + url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid,page) + torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) + return self._parser().torrentsList(torrentspage) + + + def getTorrentsCommented(self, page=1): + """ + Returns a list with all user's commented torrents + in form of dictionary {postid, torrentid, comment,postdate, pages} + + """ + if self.userid is None: + self.userid = self.getUserId() + + url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1],page) + torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) + return self._parser().postsList(torrentspage) + + + + ############################################### + # specific values # + ############################################### + + + def specificUserInfo(self): + """ + Returns specific attributes of user info. None if user's paranoia is on + """ + info = SpecificInformation() + # Initialize attributes + info.joindate, info.lastseen, info.dataup, info.datadown,\ + info.ratio, info.rratio,info.uppercentile,info.downpercentile, \ + info.torrentsuppercentile,info.reqfilledpercentile,info.bountyspentpercentile, \ + info.postsmadepercentile,info.artistsaddedpercentile,info.overallpercentile, \ + info.postsmadecom,info.torrentscommentscom,info.collagesstartedcom,info.collagescontrcon, \ + info.reqfilledcom,info.reqvotedcom,info.uploadedcom,info.uniquecom, info.perfectcom, \ + info.seedingcom, info.leechingcom,info.snatchedcom,info.invitedcom,info.artistsaddedcom \ + = (None,None, None, None,None,None,None,None,None,None,None,None,None, None,\ + None,None,None,None,None,None,None,None,None,None,None,None,None,None) + + + if not self.userinfo and self.getInfo() is None: + pass + else: + ######## stats ########### + info.joindate = self.userinfo['stats']['joined'] + info.lastseen = self.userinfo['stats']['lastseen'] + info.dataup = self.userinfo['stats']['uploaded'] + info.datadown = self.userinfo['stats']['downloaded'] + info.ratio = self.userinfo['stats']['ratio'] + info.rratio = self.userinfo['stats']['rratio'] + ######## percentile ########### + info.uppercentile = self.userinfo['percentile']['dataup'] + info.downpercentile = self.userinfo['percentile']['datadown'] + info.torrentsuppercentile = self.userinfo['percentile']['torrentsup'] + info.reqfilledpercentile = self.userinfo['percentile']['reqfilled'] + info.bountyspentpercentile = self.userinfo['percentile']['bountyspent'] + info.postsmadepercentile = self.userinfo['percentile']['postsmade'] + info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded'] + info.overallpercentile = self.userinfo['percentile']['overall'] + ######## community ########### + info.postsmadecom = self.userinfo['community']['forumposts'] + info.torrentscomments = self.userinfo['community']['torrentscomments'] + info.collagesstartedcom = self.userinfo['community']['startedcollages'] + info.collagescontrcon = self.userinfo['community']['contributedcollages'] + info.reqfilledcom = self.userinfo['community']['reqfilled'] + info.reqvotedcom = self.userinfo['community']['reqvoted'] + info.uploadedcom = self.userinfo['community']['uploaded'] + info.uniquecom = self.userinfo['community']['uniquegroups'] + info.perfectcom = self.userinfo['community']['pefectflacs'] + info.seedingcom = self.userinfo['community']['seeding'] + info.leechingcom = self.userinfo['community']['leeching'] + info.snatchedcom = self.userinfo['community']['snatched'] + info.invitedcom = self.userinfo['community']['invited'][0] + info.artistsaddedcom = self.userinfo['community']['artists'] + + + + return info + + +class Torrent(WhatBase): + """A What.CD torrent""" + + def __init__(self, id, page, isparent, whatcd): + """Create a torrent object. + # Parameters: + * id str: The torrent's id. + * whatcd object: the WhatCD network object + * page: The torrent page's number [optional] + """ + WhatBase.__init__(self, whatcd) + self.id = id + self.page = page + self.whatcd = whatcd + self.isParent = isparent + self.torrentpage = "/torrents.php?" + self.torrentinfo = self.getInfo() + + + def getTorrentUrl(self): + """ + Returns a dictionnary torrent's real URL + """ + if self.isParent: + form = {'id': self.id, 'page':self.page} + data = urllib.urlencode(form) + return self.torrentpage + data + else: + form = {'torrentid': self.id, 'page':self.page} + data = urllib.urlencode(form) + headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers + if dict(headers) is None: + return None + else: + return dict(headers)['location'] + + + def getInfo(self): + """ + Returns a dictionnary with torrents's info + """ + torrentpage = BeautifulSoup(self._request("GET", "/"+self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body) + + if 'Site log' in torrentpage.find("title").string: + print "no torrent retrieved with such id" + return None + else: + return self._parser().torrentInfo(torrentpage, self.id, self.isParent) + + + def getTorrentParentId(self): + """ + Returns torrent's group id + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['parentid'] + + def getTorrentDownloadURL(self): + """ + Returns relative url to download the torrent + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['downloadurl'] + + def getTorrentDetails(self): + """ + Returns torrent's details (format / bitrate) + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['details'] + + def getTorrentEditionInfo(self): + """ + Returns torrent's edition info (Edition information / media type) + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['editioninfo'] + + def getTorrentMediaType(self): + """ + Returns torrent's media type + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['rlsmedia'] + + def getTorrentSize(self): + """ + Returns torrent's size + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['size'] + + + def getTorrentSnatched(self): + """ + Returns torrent's total snatches + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['snatched'] + + + def getTorrentSeeders(self): + """ + Returns torrent's current seeders + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['seeders'] + + def getTorrentLeechers(self): + """ + Returns torrent's current leechers + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['leechers'] + + def getTorrentUploadedBy(self): + """ + Returns torrent's uploader + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['uploadedby'] + + def getTorrentFolderName(self): + """ + Returns torrent's folder name + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['foldername'] + + def getTorrentFileList(self): + """ + Returns torrent's file list + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['filelist'] + + + def getTorrentReleaseType(self): + """ + Returns torrent's release type + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['rlstype'] + + def getTorrentDescription(self): + """ + Returns torrent's description / empty string is there's none + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['torrentdescription'] + + def getTorrentComments(self): + """ + Returns a list of dictionnaries with each comment in the torrent page + {postid,post,userid,username} + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['comments'] + + def getTorrentCommentsPagesNumber(self): + """ + Returns number of pages of comments in the torrent + """ + if self.torrentinfo: + return self.torrentInfo['torrent']['commentspages'] + + def isTorrentFreeLeech(self): + """ + Returns True if torrent is freeleeech, False if not + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['isfreeleech'] + + def isTorrentReported(self): + """ + Returns True if torrent is reported, False if not + """ + if self.torrentinfo: + return self.torrentinfo['torrent']['isreported'] + + +class Artist(WhatBase): + """A What.CD artist""" + + def __init__(self, name, whatcd): + """Create an artist object. + # Parameters: + * name str: The artist's name. + * whatcd object: The WhatCD network object + """ + WhatBase.__init__(self, whatcd) + self.name = name + self.whatcd = whatcd + self.artistpage = "/artist.php" + self.utils = Utils() + self.info = self.getInfo() + + + def getArtistName(self): + """ + Returns artist's name + """ + return self.name + + def getArtistId(self): + """ + Returns artist's id, None if artist's not found + """ + form = {'artistname': self.name} + data = urllib.urlencode(form) + headers = self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).headers + if dict(headers)['location'][0:14] != 'artist.php?id=': + return None + else: + return dict(headers)['location'][14:] + + def getInfo(self): + """ + Returns artist's info, None if there isn't + """ + if self.getArtistId(): + form = {'id': self.getArtistId()} + data = urllib.urlencode(form) + artistpage = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body) + return self._parser().artistInfo(artistpage) + else: + print "no artist info retrieved" + return None + + def getArtistReleases(self): + """ + Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id} + """ + return self.info['releases'] + + def getArtistImage(self): + """ + Return the artist image URL, None if there's no image + """ + return self.info['image'] + + def getArtistInfo(self): + """ + Return the artist's info, blank string if none + """ + return self.info['info'] + + def getArtistTags(self): + """ + Return a list with artist's tags + """ + return self.info['tags'] + + def getArtistSimilar(self): + """ + Return a list with artist's similar artists + """ + return self.info['similarartists'] + + def getArtistRequests(self): + """ + Returns a list with all artist's requests in form of dictionary {requestname, id} + """ + return self.info['requests'] + + def setArtistInfo(self, id, info): + """ + Updates what.cd artist's info and image + Returns 1 if artist info updated succesfully, 0 if not. + # Parameters: + * id str: what.cd artist's id + * info tuple: (The artist's info -str-, image url -str- (None if there isn't)) + """ + if info[0]: + params = {'action': 'edit','artistid':id} + data = urllib.urlencode(params) + + edit_page = BeautifulSoup(self._request("GET", self.artistpage +"?"+ data, "", self.whatcd.headers).execute(True).body) + what_form = self._parser().whatForm(edit_page,'edit') + if info[1]: + image_to_post = info[1] + else: + image_to_post = what_form['image'] + data_to_post = {'body': info[0].encode('utf-8'), + 'summary':'automated artist info insertion',\ + 'image':image_to_post,\ + 'artistid':what_form['artistid'],\ + 'auth':what_form['auth'],\ + 'action':what_form['action']} + + #post artist's info + self.whatcd.headers['Content-type']="application/x-www-form-urlencoded" + response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False) + artist_id_returned = dict(response.headers)['location'][14:] + + if str(artist_id_returned) == str(what_form['artistid']) : + return 1 + else: + return 0 + + else: + return 'no artist info provided. Aborting.' + exit() + + +class Parser(object): + + def __init__(self,whatcd): + self.utils = Utils() + self.whatcd = whatcd + self.totalpages = 0 + + def authenticatedUserInfo(self, dom): + """ + Parse the index page and returns a dictionnary with basic authenticated user information + """ + userInfo = {} + soup = BeautifulSoup(str(dom)) + for ul in soup.fetch('ul'): + if ul["id"] == "userinfo_username": + #retrieve user logged id + hrefid = ul.findAll('li')[0].find("a")["href"] + regid = re.compile('[0-9]+') + if regid.search(hrefid) is None: + self.debugMessage("not found href to retrieve user id") + else: + userInfo["id"] = regid.search(hrefid).group(0) + + #retrieve user logged id + hrefauth = ul.findAll('li')[2].find("a")["href"] + regauth = re.compile('=[0-9a-fA-F]+') + if regid.search(hrefid) is None: + self.debugMessage("not found href to retrieve user id") + else: + userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:] + + elif ul["id"] == "userinfo_stats": + if len(ul.findAll('li')) > 0: + userInfo["uploaded"] = ul.findAll('li')[0].find("span").string + userInfo["downloaded"] = ul.findAll('li')[1].find("span").string + userInfo["ratio"] = ul.findAll('li')[2].findAll("span")[1].string + userInfo["required"] = ul.findAll('li')[3].find("span").string + userInfo["authenticate"] = True + + return userInfo + + def userInfo(self, dom, user): + """ + Parse an user's page and returns a dictionnary with its information + + # Parameters: + * dom str: user page html + * user str: what.cd username + """ + userInfo = {'stats':{}, 'percentile':{}, 'community':{}} + soup = BeautifulSoup(str(dom)) + + for div in soup.fetch('div',{'class':'box'}): + + #if paronoia is not set to 'Off', stop collecting data + if div.findAll('div')[0].string == "Personal": + if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off": + return None + + statscontainer = soup.findAll('div', {'class':'box'})[1] + percentilecontainer = soup.findAll('div', {'class':'box'})[2] + communitycontainer = soup.findAll('div', {'class':'box'})[4] + + + userInfo['stats']['joined'] = statscontainer.findAll('li')[0].find('span')['title'] + userInfo['stats']['lastseen'] = statscontainer.findAll('li')[1].find('span')['title'] + userInfo['stats']['uploaded'] = statscontainer.findAll('li')[2].string[10:] + userInfo['stats']['downloaded'] = statscontainer.findAll('li')[3].string[12:] + userInfo['stats']['ratio'] = statscontainer.findAll('li')[4].find('span').string + userInfo['stats']['rratio'] = statscontainer.findAll('li')[5].string[16:] + userInfo['percentile']['dataup'] = percentilecontainer.findAll('li')[0].string[15:] + userInfo['percentile']['datadown'] = percentilecontainer.findAll('li')[1].string[17:] + userInfo['percentile']['torrentsup'] = percentilecontainer.findAll('li')[2].string[19:] + userInfo['percentile']['reqfilled'] = percentilecontainer.findAll('li')[3].string[17:] + userInfo['percentile']['bountyspent'] = percentilecontainer.findAll('li')[4].string[14:] + userInfo['percentile']['postsmade'] = percentilecontainer.findAll('li')[5].string[12:] + userInfo['percentile']['artistsadded'] = percentilecontainer.findAll('li')[6].string[15:] + userInfo['percentile']['overall'] = percentilecontainer.findAll('li')[7].find('strong').string[14:] + + userInfo['community']['forumposts'] = (communitycontainer.findAll('li')[0].contents[0].string[13:len(communitycontainer.findAll('li')[0].contents[0].string)-2],\ + communitycontainer.findAll('li')[0].find('a')['href']) + userInfo['community']['torrentscomments'] = (communitycontainer.findAll('li')[1].contents[0].string[18:len(communitycontainer.findAll('li')[1].contents[0].string)-2],\ + communitycontainer.findAll('li')[1].find('a')['href']) + userInfo['community']['startedcollages'] = (communitycontainer.findAll('li')[2].contents[0].string[18:len(communitycontainer.findAll('li')[2].contents[0].string)-2],\ + communitycontainer.findAll('li')[2].find('a')['href']) + userInfo['community']['contributedcollages'] = (communitycontainer.findAll('li')[3].contents[0].string[25:len(communitycontainer.findAll('li')[3].contents[0].string)-2],\ + communitycontainer.findAll('li')[3].find('a')['href']) + userInfo['community']['reqfilled'] = (communitycontainer.findAll('li')[4].contents[0].string[17:len(communitycontainer.findAll('li')[4].contents[0].string)-2],\ + communitycontainer.findAll('li')[4].find('a')['href']) + userInfo['community']['reqvoted'] = (communitycontainer.findAll('li')[5].contents[0].string[16:len(communitycontainer.findAll('li')[5].contents[0].string)-2],\ + communitycontainer.findAll('li')[5].find('a')['href']) + userInfo['community']['uploaded'] = (communitycontainer.findAll('li')[6].contents[0].string[10:len(communitycontainer.findAll('li')[6].contents[0].string)-2],\ + communitycontainer.findAll('li')[6].find('a')['href']) + userInfo['community']['uniquegroups'] = (communitycontainer.findAll('li')[7].contents[0].string[15:len(communitycontainer.findAll('li')[7].contents[0].string)-2],\ + communitycontainer.findAll('li')[7].find('a')['href']) + userInfo['community']['pefectflacs'] = (communitycontainer.findAll('li')[8].contents[0].string[16:len(communitycontainer.findAll('li')[8].contents[0].string)-2],\ + communitycontainer.findAll('li')[8].find('a')['href']) + userInfo['community']['seeding'] = (communitycontainer.findAll('li')[9].contents[0].string[9:len(communitycontainer.findAll('li')[9].contents[0].string)-2],\ + communitycontainer.findAll('li')[9].find('a')['href']) + userInfo['community']['leeching'] = (communitycontainer.findAll('li')[10].contents[0].string[10:len(communitycontainer.findAll('li')[10].contents[0].string)-2],\ + communitycontainer.findAll('li')[10].find('a')['href']) + #NB: there's a carriage return and white spaces inside the snatched li tag + userInfo['community']['snatched'] = (communitycontainer.findAll('li')[11].contents[0].string[10:len(communitycontainer.findAll('li')[11].contents[0].string)-7],\ + communitycontainer.findAll('li')[11].find('a')['href']) + userInfo['community']['invited'] = (communitycontainer.findAll('li')[12].contents[0].string[9:],\ + None) + userInfo['community']['artists'] = percentilecontainer.findAll('li')[6]['title'] + + return userInfo + + def torrentInfo(self, dom, id, isparent): + """ + Parse a torrent's page and returns a dictionnary with its information + """ + + torrentInfo = {'torrent':{}} + torrentfiles = [] + torrentdescription = "" + isreported = False + isfreeleech = False + soup = BeautifulSoup(str(dom)) + if isparent: + torrentInfo['torrent']['parentid'] = id + else: + groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href'] + torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1] + regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray') + torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0) + torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=")+1:] + torrentInfo['torrent']['downloadurl'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a',{'title':'Download'})[0]['href'] + ## is freeleech or/and reported? ## + #both + if len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 4: + isreported = True + isfreeleech = True + torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0] + #either + elif len(soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents) == 2: + if soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Reported': + isreported = True + elif soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[1].string == 'Freeleech!': + isreported = True + torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0] + #none + else: + torrentInfo['torrent']['details'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('a')[-1].contents[0] + torrentInfo['torrent']['isfreeleech'] = isfreeleech + torrentInfo['torrent']['isreported'] = isreported + torrentInfo['torrent']['size'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[1].string + torrentInfo['torrent']['snatched'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[2].string + torrentInfo['torrent']['seeders'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[3].string + torrentInfo['torrent']['leechers'] = soup.findAll('tr',{'id':'torrent%s'%id})[0].findAll('td')[4].string + torrentInfo['torrent']['uploadedby'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('a')[0].string + foldername = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('div')[1].string + if(foldername is None): + torrentInfo['torrent']['foldername'] = None + else: + torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername) + files = soup.findAll('div',{'id':'files_%s'%id})[0].findAll('tr') + for file in files[1:-1]: + torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string)) + torrentInfo['torrent']['filelist'] = torrentfiles + #is there any description? + if len(soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')) > 1: + description = torrentInfo['torrent']['description'] = soup.findAll('tr',{'id':'torrent_%s'%id})[0].findAll('blockquote')[1].contents + info = '' + for content in description: + if content.string: + info = "%s%s" % (info, self.utils._string(content.string)) + torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string)) + torrentInfo['torrent']['torrentdescription'] = torrentdescription + regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown') + torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0) + + torrentInfo['torrent']['comments'] = [] + torrentInfo['torrent']['commentspages'] = 0 + + if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0: + linkbox = dom.findAll("div", {"class": "linkbox"})[-1] + pages = 1 + postid = '' + userid = '' + post = '' + # if there's more than 1 page of torrents + if linkbox.find("a"): + # by default torrent page show last page of comments + lastpage = linkbox.findAll("a")[-1]['href'] + pages = int(lastpage[18:lastpage.find('&')]) +1 + for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}): + postid = comment.find("a",{"class":"post_id"}).string[1:] + userid = comment.findAll("a")[1]['href'][12:] + username = comment.findAll("a")[1].string + post = comment.find("div", {"id":"content"+postid}) + post = u''.join([post.string for post in post.findAll(text=True)]) + torrentInfo['torrent']['comments'].append({"postid":postid,"post":post,"userid":userid,"username":username}) + + torrentInfo['torrent']['commentspages'] = pages + + return torrentInfo + + def artistInfo(self, dom): + """ + Parse an artist's page and returns a dictionnary with its information + """ + artistInfo = {} + releases = [] + requests = [] + infoartist = "" + tagsartist = [] + similarartists = [] + soup = BeautifulSoup(str(dom)) + soupfetch = soup.fetch('table',{'class':'torrent_table'}) + if not soupfetch: + soupfetch = soup.fetch('table',{'class':'torrent_table grouped release_table'}) + for releasetype in soupfetch: + releasetypenames = releasetype.findAll('strong') + releasetypename = releasetype.findAll('strong')[0].string + for release in releasetypenames[1:-1]: + #skip release edition info and Freeleech! s + if len(release.parent.contents) > 1 and len(release.contents) > 1 : + releaseyear = release.contents[0][0:4] + releasename = release.contents[1].string + releasehref = release.contents[1]['href'] + releaseid = releasehref[releasehref.rfind('=')+1:] + releases.append({'releasetype':releasetypename,\ + 'year': releaseyear,'name':self.utils.decodeHTMLEntities(releasename),'id':releaseid}) + + artistInfo['releases'] = releases + #is there an artist image? + artistInfo['image'] = None + if soup.find('div', {'class':'box'}).find('img'): + artistInfo['image'] = soup.find('div', {'class':'box'}).find('img')['src'] + #is there any artist info? + contents = soup.find('div', {'class':'body'}).contents + if len(contents) > 0: + for content in contents: + if content.string: + infoartist = "%s%s" % (infoartist, self.utils._string(content.string)) + artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist) + #is there any artist tags? + if soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li'): + ul = soup.findAll('ul',{'class':'stats nobullet'})[0].findAll('li') + for li in ul: + if li.contents[0].string: + tagsartist.append(self.utils._string(li.contents[0].string)) + artistInfo['tags'] = tagsartist + #is there any similar artist? + if soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'}): + artists = soup.findAll('ul',{'class':'stats nobullet'})[2].findAll('span',{'title':'2'}) + for artist in artists: + if artist.contents[0].string: + similarartists.append(self.utils._string(artist.contents[0].string)) + artistInfo['similarartists'] = similarartists + #is there any request? + if soup.find('table',{'id':'requests'}): + for request in soup.find('table',{'id':'requests'}).findAll('tr',{'class':re.compile('row')}): + requests.append({'requestname':request.findAll('a')[1].string,'id':request.findAll('a')[1]['href'][28:]}) + + artistInfo['requests'] = requests + + return artistInfo + + def torrentsList(self,dom): + """ + Parse a torrent's list page and returns a dictionnary with its information + """ + torrentslist = [] + torrentssoup = dom.find("table", {"width": "100%"}) + pages = 0 + + #if there's at least 1 torrent in the list + if torrentssoup: + navsoup = dom.find("div", {"class": "linkbox"}) + pages = 1 + regyear = re.compile('\[\d{4}\]') + + #is there a page navigation bar? + if navsoup.contents: + #if there's more than 1 page of torrents + if navsoup.contents[-1].has_key('href'): + lastpage = navsoup.contents[-1]['href'] + pages = lastpage[18:lastpage.find('&')] + self.totalpages = pages + else: #we are at the last page, no href + pages = self.totalpages+1 + #fetch all tr except first one (column head) + for torrent in torrentssoup.fetch('tr')[1:]: + #exclude non music torrents + if torrent.find('td').find('div')['class'][0:10] == 'cats_music': + + torrenttag = torrent.find('td').contents[1]['title'] + torrentdl = torrent.findAll('td')[1].find('span').findAll('a')[0]['href'] + torrentrm = torrent.findAll('td')[1].find('span').findAll('a')[1]['href'] + torrentid = torrentrm[torrentrm.rfind('=')+1:] + torrenttd = torrent.findAll('td')[1] + + # remove dataless elements + torrenttags = torrenttd.div + rightlinks = torrenttd.span + torrenttags.extract() + rightlinks.extract() + + # remove line breaks + torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")]) + torrenttd = BeautifulSoup(torrenttd) + isScene = False + info = "" + + if len(torrenttd.findAll('a')) == 2: + #one artist + torrentartist = (self.utils.decodeHTMLEntities(torrenttd.find("a").string),) + artistid = (torrenttd.find("a")['href'][14:],) + torrentalbum = torrenttd.findAll("a")[1].string + info = torrenttd.findAll("a")[1].nextSibling.string.strip() + + + elif len(torrenttd.findAll('a')) == 1: + #various artists + torrentartist = ('Various Artists',) + artistid = () + torrentalbum = torrenttd.find("a").string + info = torrenttd.find("a").nextSibling.string.strip() + + elif len(torrenttd.findAll('a')) == 3: + #two artists + torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[0].string), \ + self.utils.decodeHTMLEntities(torrenttd.findAll("a")[1].string)) + artistid = (torrenttd.findAll("a")[0]['href'][14:],\ + torrenttd.findAll("a")[1]['href'][14:]) + torrentalbum = torrenttd.findAll("a")[2].string + info = torrenttd.findAll("a")[2].nextSibling.string.strip() + + elif torrenttd.find(text=re.compile('performed by')): + #performed by + torrentartist = (self.utils.decodeHTMLEntities(torrenttd.findAll("a")[-2].string),) + artistid = (torrenttd.findAll("a")[-2]['href'][14:],) + torrentalbum = torrenttd.findAll("a")[-1].string + info = torrenttd.findAll("a")[-1].nextSibling.string.strip() + + if 'Scene' in info: + isScene = True + + torrentyear = regyear.search(info).group(0)[1:5] + torrentslist.append({'tag':torrenttag,\ + 'dlurl':torrentdl,\ + 'id':torrentid, \ + 'artist':torrentartist,\ + 'artistid':artistid,\ + 'album':self.utils.decodeHTMLEntities(torrentalbum), + 'year':torrentyear, + 'pages':pages, + 'scene':isScene}) + + return torrentslist + + def postsList(self,dom): + """ + Parse a post list page and returns a dictionnary with each post information: + {torrentid, commentid, postid} + """ + postslist = [] + postssoup = dom.find("div", {"class": "thin"}) + pages = 0 + + #if there's at least 1 post in the list + if postssoup: + navsoup = dom.find("div", {"class": "linkbox"}) + + #if there's more than 1 page of torrents + if navsoup.find("a"): + lastpage = navsoup.findAll("a")[1]['href'] + pages = lastpage[18:lastpage.find('&')] + self.totalpages = pages + else: #we are at the last page, no link + pages = 1 + + for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}): + commentbody = post.find("td", {"class":"body"}) + postid = post.find("span").findAll("a")[0].string[1:] + torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=')+1:] + comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)]) + postdate = post.find("span", {"class":"time"})['title'] + postslist.append({'postid':postid,\ + 'torrentid':torrentid,\ + 'comment':comment,\ + 'postdate':postdate,\ + 'pages':pages}) + + + return postslist + + + def whatForm(self, dom, action): + """ + Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values + # Parameters: + * dom str: the edit page dom. + + action str: the action value from the requested form + """ + inputs = {} + + form = dom.find('input',{'name':'action','value':action}).parent + elements = form.fetch(('input','textarea')) + #get all form elements except for submit input + for element in elements[0:-1]: + name = element.get('name',None) + if element.name == 'textarea': + inputs[name] = element.string + else: + inputs[name] = element.get('value',None) + return inputs + + + +if __name__ == "__main__": + print "Module to manage what.cd as a web service"