From 3d066d7e5b18d1c45aa2efd680a0bf80c2aa68c6 Mon Sep 17 00:00:00 2001 From: Aaron Cohen Date: Sat, 22 Sep 2012 21:08:14 -0700 Subject: [PATCH] Switching to pygazelle --- lib/pygazelle/__init__.py | 0 lib/pygazelle/api.py | 283 ++++++ lib/pygazelle/artist.py | 72 ++ lib/pygazelle/category.py | 13 + lib/pygazelle/encoding.py | 13 + lib/pygazelle/format.py | 8 + lib/pygazelle/media.py | 11 + lib/pygazelle/release_type.py | 19 + lib/pygazelle/request.py | 29 + lib/pygazelle/tag.py | 17 + lib/pygazelle/torrent.py | 128 +++ lib/pygazelle/torrent_group.py | 135 +++ lib/pygazelle/user.py | 217 +++++ lib/whatapi.py | 1495 -------------------------------- 14 files changed, 945 insertions(+), 1495 deletions(-) create mode 100644 lib/pygazelle/__init__.py create mode 100644 lib/pygazelle/api.py create mode 100644 lib/pygazelle/artist.py create mode 100644 lib/pygazelle/category.py create mode 100644 lib/pygazelle/encoding.py create mode 100644 lib/pygazelle/format.py create mode 100644 lib/pygazelle/media.py create mode 100644 lib/pygazelle/release_type.py create mode 100644 lib/pygazelle/request.py create mode 100644 lib/pygazelle/tag.py create mode 100644 lib/pygazelle/torrent.py create mode 100644 lib/pygazelle/torrent_group.py create mode 100644 lib/pygazelle/user.py delete mode 100755 lib/whatapi.py diff --git a/lib/pygazelle/__init__.py b/lib/pygazelle/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pygazelle/api.py b/lib/pygazelle/api.py new file mode 100644 index 00000000..d2290350 --- /dev/null +++ b/lib/pygazelle/api.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python +# +# PyGazelle - https://github.com/cohena/pygazelle +# A Python implementation of the What.cd Gazelle JSON API +# +# Loosely based on the API implementation from 'whatbetter', by Zachary Denton +# See https://github.com/zacharydenton/whatbetter + +import json +import time +import requests + +from user import User +from artist import Artist +from tag import Tag +from request import Request +from torrent_group import TorrentGroup +from torrent import Torrent +from category import Category + +class LoginException(Exception): + pass + +class RequestException(Exception): + pass + +class GazelleAPI(object): + last_request = time.time() # share amongst all api objects + default_headers = { + 'Connection': 'keep-alive', + 'Cache-Control': 'max-age=0', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)'\ + 'AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.79'\ + 'Safari/535.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9'\ + ',*/*;q=0.8', + 'Accept-Encoding': 'gzip,deflate,sdch', + 'Accept-Language': 'en-US,en;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'} + + + def __init__(self, username=None, password=None): + self.session = requests.session(headers=self.default_headers) + self.username = username + self.password = password + self.authkey = None + self.passkey = None + self.userid = None + self.logged_in_user = None + self.cached_users = {} + self.cached_artists = {} + self.cached_tags = {} + self.cached_torrent_groups = {} + self.cached_torrents = {} + self.cached_requests = {} + self.cached_categories = {} + self.site = "https://what.cd/" + self.rate_limit = 2.0 # seconds between requests + self._login() + + def _login(self): + """ + Private method. + Logs in user and gets authkey from server. + """ + loginpage = 'https://what.cd/login.php' + data = {'username': self.username, + 'password': self.password} + r = self.session.post(loginpage, data=data) + if r.status_code != 200: + raise LoginException + accountinfo = self.request('index') + self.userid = accountinfo['id'] + self.authkey = accountinfo['authkey'] + self.passkey = accountinfo['passkey'] + self.logged_in_user = User(self.userid, self) + self.logged_in_user.set_index_data(accountinfo) + + def request(self, action, **kwargs): + """ + Makes an AJAX request at a given action. + Pass an action and relevant arguments for that action. + """ + + ajaxpage = 'ajax.php' + content = self.unparsed_request(ajaxpage, action, **kwargs) + try: + parsed = json.loads(content) + if parsed['status'] != 'success': + raise RequestException + return parsed['response'] + except ValueError: + raise RequestException + + def unparsed_request(self, page, action, **kwargs): + """ + Makes a generic HTTP request at a given page with a given action. + Also pass relevant arguments for that action. + """ + while time.time() - self.last_request < self.rate_limit: + time.sleep(0.1) + + url = "%s/%s" % (self.site, page) + params = {'action': action} + if self.authkey: + params['auth'] = self.authkey + params.update(kwargs) + r = self.session.get(url, params=params, allow_redirects=False) + self.last_request = time.time() + return r.content + + def get_user(self, id): + """ + Returns a User for the passed ID, associated with this API object. If the ID references the currently logged in + user, the user returned will be pre-populated with the information from an 'index' API call. Otherwise, you'll + need to call User.update_user_data(). This is done on demand to reduce unnecessary API calls. + """ + id = int(id) + if id == self.userid: + return self.logged_in_user + elif id in self.cached_users.keys(): + return self.cached_users[id] + else: + return User(id, self) + + def search_users(self, search_query): + """ + Returns a list of users returned for the search query. You can search by name, part of name, and ID number. If + one of the returned users is the currently logged-in user, that user object will be pre-populated with the + information from an 'index' API call. Otherwise only the limited info returned by the search will be pre-pop'd. + You can query more information with User.update_user_data(). This is done on demand to reduce unnecessary API calls. + """ + response = self.request(action='usersearch', search=search_query) + results = response['results'] + + found_users = [] + for result in results: + user = self.get_user(result['userId']) + user.set_search_result_data(result) + found_users.append(user) + + return found_users + + def get_artist(self, id, name=None): + """ + Returns an Artist for the passed ID, associated with this API object. You'll need to call Artist.update_data() + if the artist hasn't already been cached. This is done on demand to reduce unnecessary API calls. + """ + id = int(id) + if id in self.cached_artists.keys(): + artist = self.cached_artists[id] + else: + artist = Artist(id, self) + if name: + artist.name = name + return artist + + def get_tag(self, name): + """ + Returns a Tag for the passed name, associated with this API object. If you know the count value for this tag, + pass it to update the object. There is no way to query the count directly from the API, but it can be retrieved + from other calls such as 'artist', however. + """ + if name in self.cached_tags.keys(): + return self.cached_tags[name] + else: + return Tag(name, self) + + def get_request(self, id): + """ + Returns a Request for the passed ID, associated with this API object. You'll need to call Request.update_data() + if the request hasn't already been cached. This is done on demand to reduce unnecessary API calls. + """ + id = int(id) + if id in self.cached_requests.keys(): + return self.cached_requests[id] + else: + return Request(id, self) + + def get_torrent_group(self, id): + """ + Returns a TorrentGroup for the passed ID, associated with this API object. + """ + id = int(id) + if id in self.cached_torrent_groups.keys(): + return self.cached_torrent_groups[id] + else: + return TorrentGroup(id, self) + + def get_torrent(self, id): + """ + Returns a TorrentGroup for the passed ID, associated with this API object. + """ + id = int(id) + if id in self.cached_torrents.keys(): + return self.cached_torrents[id] + else: + return Torrent(id, self) + + def get_category(self, id, name=None): + """ + Returns a Category for the passed ID, associated with this API object. + """ + id = int(id) + if id in self.cached_categories.keys(): + cat = self.cached_categories[id] + else: + cat = Category(id, self) + if name: + cat.name = name + return cat + + def search_torrents(self, **kwargs): + """ + Searches based on the args you pass and returns torrent groups filled with torrents. + Pass strings unless otherwise specified. + Valid search args: + searchstr (any arbitrary string to search for) + page (page to display -- default: 1) + artistname (self explanatory) + groupname (torrent group name, equivalent to album) + recordlabel (self explanatory) + cataloguenumber (self explanatory) + year (self explanatory) + remastertitle (self explanatory) + remasteryear (self explanatory) + remasterrecordlabel (self explanatory) + remastercataloguenumber (self explanatory) + filelist (can search for filenames found in torrent...unsure of formatting for multiple files) + encoding (use constants in pygazelle.Encoding module) + format (use constants in pygazelle.Format module) + media (use constants in pygazelle.Media module) + releasetype (use constants in pygazelle.ReleaseType module) + haslog (int 1 or 0 to represent boolean, 100 for 100% only, -1 for < 100% / unscored) + hascue (int 1 or 0 to represent boolean) + scene (int 1 or 0 to represent boolean) + vanityhouse (int 1 or 0 to represent boolean) + freetorrent (int 1 or 0 to represent boolean) + taglist (comma separated tag names) + tags_type (0 for 'any' matching, 1 for 'all' matching) + order_by (use constants in pygazelle.order module that start with by_ in their name) + order_way (use way_ascending or way_descending constants in pygazelle.order) + filter_cat (for each category you want to search, the param name must be filter_cat[catnum] and the value 1) + ex. filter_cat[1]=1 turns on Music. + filter_cat[1]=1, filter_cat[2]=1 turns on music and applications. (two separate params and vals!) + Category object ids return the correct int value for these. (verify?) + + Returns a dict containing keys 'curr_page', 'pages', and 'results'. Results contains a matching list of Torrents + (they have a reference to their parent TorrentGroup). + """ + + response = self.request(action='browse', **kwargs) + results = response['results'] + if len(results): + curr_page = response['currentPage'] + pages = response['pages'] + else: + curr_page = 1 + pages = 1 + + matching_torrents = [] + for torrent_group_dict in results: + torrent_group = self.get_torrent_group(torrent_group_dict['groupId']) + torrent_group.set_torrent_search_data(torrent_group_dict) + + for torrent_dict in torrent_group_dict['torrents']: + torrent_dict['groupId'] = torrent_group.id + torrent = self.get_torrent(torrent_dict['torrentId']) + torrent.set_torrent_search_data(torrent_dict) + matching_torrents.append(torrent) + + return {'curr_page': curr_page, 'pages': pages, 'results': matching_torrents} + + def generate_torrent_link(self, id): + url = "%storrents.php?action=download&id=%s&authkey=%s&torrent_pass=%s" %\ + (self.site, id, self.logged_in_user.authkey, self.logged_in_user.passkey) + return url + + def save_torrent_file(self, id, dest): + file_data = self.unparsed_request("torrents.php", 'download', + id=id, authkey=self.logged_in_user.authkey, torrent_pass=self.logged_in_user.passkey) + with open(dest, 'w+') as dest_file: + dest_file.write(file_data) diff --git a/lib/pygazelle/artist.py b/lib/pygazelle/artist.py new file mode 100644 index 00000000..e4c4c513 --- /dev/null +++ b/lib/pygazelle/artist.py @@ -0,0 +1,72 @@ + + +class InvalidArtistException(Exception): + pass + +class Artist(object): + """ + This class represents an Artist. It is created knowing only its ID. To reduce API accesses, load information using + Artist.update_data() only as needed. + """ + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.name = None + self.notifications_enabled = None + self.has_bookmarked = None + self.image = None + self.body = None + self.vanity_house = None + self.tags = [] + self.similar_artists_and_score = {} + self.statistics = None + self.torrent_groups = [] + self.requests = [] + + self.parent_api.cached_artists[self.id] = self # add self to cache of known Artist objects + + def update_data(self): + response = self.parent_api.request(action='artist', id=self.id) + self.set_data(response) + + def set_data(self, artist_json_response): + if self.id != artist_json_response['id']: + raise InvalidArtistException("Tried to update an artists's information from an 'artist' API call with a different id." + + " Should be %s, got %s" % (self.id, artist_json_response['id']) ) + + self.name = artist_json_response['name'] + self.notifications_enabled = artist_json_response['notificationsEnabled'] + self.has_bookmarked = artist_json_response['hasBookmarked'] + self.image = artist_json_response['image'] + self.body = artist_json_response['body'] + self.vanity_house = artist_json_response['vanityHouse'] + + self.tags = [] + for tag_dict in artist_json_response['tags']: + tag = self.parent_api.get_tag(tag_dict['name']) + tag.set_artist_count(self, tag_dict['count']) + self.tags.append(tag) + + self.similar_artists_and_score = {} + for similar_artist_dict in artist_json_response['similarArtists']: + similar_artist = self.parent_api.get_artist(similar_artist_dict['artistId']) + similar_artist.name = similar_artist_dict['name'] + self.similar_artists_and_score[similar_artist] = similar_artist_dict['score'] + + self.statistics = artist_json_response['statistics'] + + self.torrent_groups = [] + for torrent_group_item in artist_json_response['torrentgroup']: + torrent_group = self.parent_api.get_torrent_group(torrent_group_item['groupId']) + torrent_group.set_artist_group_data(torrent_group_item) + self.torrent_groups.append(torrent_group) + + self.requests = [] + for request_json_item in artist_json_response['requests']: + request = self.parent_api.get_request(request_json_item['requestId']) + request.set_data(request_json_item) + self.requests.append(request) + + def __repr__(self): + return "Artist: %s - ID: %s" % (self.name, self.id) + diff --git a/lib/pygazelle/category.py b/lib/pygazelle/category.py new file mode 100644 index 00000000..7f63e127 --- /dev/null +++ b/lib/pygazelle/category.py @@ -0,0 +1,13 @@ +class InvalidCategoryException(Exception): + pass + +class Category(object): + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.name = None + + self.parent_api.cached_categories[self.id] = self # add self to cache of known Category objects + + def __repr__(self): + return "Category: %s - id: %s" % (self.name, self.id) \ No newline at end of file diff --git a/lib/pygazelle/encoding.py b/lib/pygazelle/encoding.py new file mode 100644 index 00000000..290229d4 --- /dev/null +++ b/lib/pygazelle/encoding.py @@ -0,0 +1,13 @@ +C192 = "192" +APS = "APS (VBR)" +V2 = "V2 (VBR)" +V1 = "V1 (VBR)" +C256 = "256" +APX = "APX (VBR)" +V0 = "V0 (VBR)" +C320 = "320" +LOSSLESS = "Lossless" +LOSSLESS_24 = "24bit Lossless" +V8 = "V8 (VBR)" + +ALL_ENCODINGS = [C192, APS, V2, V1, C256, APX, V0, C320, LOSSLESS, LOSSLESS_24, V8] \ No newline at end of file diff --git a/lib/pygazelle/format.py b/lib/pygazelle/format.py new file mode 100644 index 00000000..12c2614c --- /dev/null +++ b/lib/pygazelle/format.py @@ -0,0 +1,8 @@ +MP3 = "MP3" +FLAC = "FLAC" +AAC = "AAC" +AC3 = "AC3" +DTS = "DTS" +OGG_VORBIS = "Ogg Vorbis" + +ALL_FORMATS = [MP3, FLAC, AAC, AC3, DTS, OGG_VORBIS] \ No newline at end of file diff --git a/lib/pygazelle/media.py b/lib/pygazelle/media.py new file mode 100644 index 00000000..995f088e --- /dev/null +++ b/lib/pygazelle/media.py @@ -0,0 +1,11 @@ +CD = "CD" +DVD = "DVD" +VINYL = "Vinyl" +SOUNDBOARD = "Soundboard" +SACD = "SACD" +DAT = "DAT" +CASETTE = "Casette" +WEB = "WEB" +BLU_RAY = "Blu-ray" + +ALL_MEDIAS = [CD, DVD, VINYL, SOUNDBOARD, SACD, DAT, CASETTE, WEB, BLU_RAY] \ No newline at end of file diff --git a/lib/pygazelle/release_type.py b/lib/pygazelle/release_type.py new file mode 100644 index 00000000..31351195 --- /dev/null +++ b/lib/pygazelle/release_type.py @@ -0,0 +1,19 @@ +ALBUM = "Album" +SOUNDTRACK = "Soundtrack" +EP = "EP" +ANTHOLOGY = "Anthology" +COMPILATION = "Compilation" +DJ_MIX = "DJ Mix" +SINGLE = "Single" +LIVE_ALBUM = "Live album" +REMIX = "Remix" +BOOTLEG = "Bootleg" +INTERVIEW = "Interview" +MIXTAPE = "Mixtape" +UNKNOWN = "Unknown" + +ALL_RELEASE_TYPES = [ALBUM, SOUNDTRACK, EP, ANTHOLOGY, COMPILATION, DJ_MIX, SINGLE, LIVE_ALBUM, REMIX, BOOTLEG, + INTERVIEW, MIXTAPE, UNKNOWN] + +def get_int_val(release_type): + return ALL_RELEASE_TYPES.index(release_type) + 1 \ No newline at end of file diff --git a/lib/pygazelle/request.py b/lib/pygazelle/request.py new file mode 100644 index 00000000..52b750e2 --- /dev/null +++ b/lib/pygazelle/request.py @@ -0,0 +1,29 @@ +class InvalidRequestException(Exception): + pass + +class Request(object): + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.category = None + self.title = None + self.year = None + self.time_added = None + self.votes = None + self.bounty = None + + self.parent_api.cached_requests[self.id] = self # add self to cache of known Request objects + + def set_data(self, request_item_json_data): + if self.id != request_item_json_data['requestId']: + raise InvalidRequestException("Tried to update a Request's information from a request JSON item with a different id." + + " Should be %s, got %s" % (self.id, request_item_json_data['requestId']) ) + self.category = self.parent_api.get_category(request_item_json_data['categoryId']) + self.title = request_item_json_data['title'] + self.year = request_item_json_data['year'] + self.time_added = request_item_json_data['timeAdded'] + self.votes = request_item_json_data['votes'] + self.bounty = request_item_json_data['bounty'] + + def __repr__(self): + return "Request: %s - ID: %s" % (self.title, self.id) \ No newline at end of file diff --git a/lib/pygazelle/tag.py b/lib/pygazelle/tag.py new file mode 100644 index 00000000..91caab2c --- /dev/null +++ b/lib/pygazelle/tag.py @@ -0,0 +1,17 @@ +class Tag(object): + def __init__(self, name, parent_api): + self.name = name + self.artist_counts = {} + self.parent_api = parent_api + + self.parent_api.cached_tags[self.name] = self # add self to cache of known Tag objects + + def set_artist_count(self, artist, count): + """ + Adds an artist to the known list of artists tagged with this tag (if necessary), and sets the count of times + that that artist has been known to be tagged with this tag. + """ + self.artist_counts[artist] = count + + def __repr__(self): + return "Tag: %s" % self.name \ No newline at end of file diff --git a/lib/pygazelle/torrent.py b/lib/pygazelle/torrent.py new file mode 100644 index 00000000..01d528d0 --- /dev/null +++ b/lib/pygazelle/torrent.py @@ -0,0 +1,128 @@ +import re + +class InvalidTorrentException(Exception): + pass + +class Torrent(object): + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.group = None + self.media = None + self.format = None + self.encoding = None + self.remaster_year = None + self.remastered = None + self.remaster_title = None + self.remaster_record_label = None + self.remaster_catalogue_number = None + self.scene = None + self.has_log = None + self.has_cue = None + self.log_score = None + self.file_count = None + self.free_torrent = None + self.size = None + self.leechers = None + self.seeders = None + self.snatched = None + self.time = None + self.has_file = None + self.description = None + self.file_list = [] + self.file_path = None + self.user = None + + self.parent_api.cached_torrents[self.id] = self + + def set_torrent_artist_data(self, artist_torrent_json_response): + if self.id != artist_torrent_json_response['id']: + raise InvalidTorrentException("Tried to update a Torrent's information from an 'artist' API call with a different id." + + " Should be %s, got %s" % (self.id, artist_torrent_json_response['id']) ) + + self.group = self.parent_api.get_torrent_group(artist_torrent_json_response['groupId']) + self.media = artist_torrent_json_response['media'] + self.format = artist_torrent_json_response['format'] + self.encoding = artist_torrent_json_response['encoding'] + self.remaster_year = artist_torrent_json_response['remasterYear'] + self.remastered = artist_torrent_json_response['remastered'] + self.remaster_title = artist_torrent_json_response['remasterTitle'] + self.remaster_record_label = artist_torrent_json_response['remasterRecordLabel'] + self.scene = artist_torrent_json_response['scene'] + self.has_log = artist_torrent_json_response['hasLog'] + self.has_cue = artist_torrent_json_response['hasCue'] + self.log_score = artist_torrent_json_response['logScore'] + self.file_count = artist_torrent_json_response['fileCount'] + self.free_torrent = artist_torrent_json_response['freeTorrent'] + self.size = artist_torrent_json_response['size'] + self.leechers = artist_torrent_json_response['leechers'] + self.seeders = artist_torrent_json_response['seeders'] + self.snatched = artist_torrent_json_response['snatched'] + self.time = artist_torrent_json_response['time'] + self.has_file = artist_torrent_json_response['hasFile'] + + def set_torrent_group_data(self, group_torrent_json_response): + if self.id != group_torrent_json_response['id']: + raise InvalidTorrentException("Tried to update a Torrent's information from a 'torrentgroup' API call with a different id." + + " Should be %s, got %s" % (self.id, group_torrent_json_response['id']) ) + + self.group = self.parent_api.get_torrent_group(group_torrent_json_response['groupId']) + self.media = group_torrent_json_response['media'] + self.format = group_torrent_json_response['format'] + self.encoding = group_torrent_json_response['encoding'] + self.remastered = group_torrent_json_response['remastered'] + self.remaster_year = group_torrent_json_response['remasterYear'] + self.remaster_title = group_torrent_json_response['remasterTitle'] + self.remaster_record_label = group_torrent_json_response['remasterRecordLabel'] + self.remaster_catalogue_number = group_torrent_json_response['remasterCatalogueNumber'] + self.scene = group_torrent_json_response['scene'] + self.has_log = group_torrent_json_response['hasLog'] + self.has_cue = group_torrent_json_response['hasCue'] + self.log_score = group_torrent_json_response['logScore'] + self.file_count = group_torrent_json_response['fileCount'] + self.size = group_torrent_json_response['size'] + self.seeders = group_torrent_json_response['seeders'] + self.leechers = group_torrent_json_response['leechers'] + self.snatched = group_torrent_json_response['snatched'] + self.free_torrent = group_torrent_json_response['freeTorrent'] + self.time = group_torrent_json_response['time'] + self.description = group_torrent_json_response['description'] + self.file_list = [ re.match("(.+){{{(\d+)}}}", item).groups() + for item in group_torrent_json_response['fileList'].split("|||") ] # tuple ( filename, filesize ) + self.file_path = group_torrent_json_response['filePath'] + self.user = self.parent_api.get_user(group_torrent_json_response['userId']) + + def set_torrent_search_data(self, search_torrent_json_response): + if self.id != search_torrent_json_response['torrentId']: + raise InvalidTorrentException("Tried to update a Torrent's information from a 'browse'/search API call with a different id." + + " Should be %s, got %s" % (self.id, search_torrent_json_response['torrentId']) ) + + # TODO: Add conditionals to handle torrents that aren't music + self.group = self.parent_api.get_torrent_group(search_torrent_json_response['groupId']) + self.remastered = search_torrent_json_response['remastered'] + self.remaster_year = search_torrent_json_response['remasterYear'] + self.remaster_title = search_torrent_json_response['remasterTitle'] + self.remaster_catalogue_number = search_torrent_json_response['remasterCatalogueNumber'] + self.media = search_torrent_json_response['media'] + self.format = search_torrent_json_response['format'] + self.encoding = search_torrent_json_response['encoding'] + self.has_log = search_torrent_json_response['hasLog'] + self.has_cue = search_torrent_json_response['hasCue'] + self.log_score = search_torrent_json_response['logScore'] + self.scene = search_torrent_json_response['scene'] + self.file_count = search_torrent_json_response['fileCount'] + self.size = search_torrent_json_response['size'] + self.seeders = search_torrent_json_response['seeders'] + self.leechers = search_torrent_json_response['leechers'] + self.snatched = search_torrent_json_response['snatches'] + self.free_torrent = search_torrent_json_response['isFreeleech'] or search_torrent_json_response['isPersonalFreeleech'] + self.time = search_torrent_json_response['time'] + + + + def __repr__(self): + if self.group: + groupname = self.group.name + else: + groupname = "Unknown Group" + return "Torrent: %s - %s - ID: %s" % (groupname, self.encoding, self.id) \ No newline at end of file diff --git a/lib/pygazelle/torrent_group.py b/lib/pygazelle/torrent_group.py new file mode 100644 index 00000000..cb47af61 --- /dev/null +++ b/lib/pygazelle/torrent_group.py @@ -0,0 +1,135 @@ +from torrent import Torrent + +class InvalidTorrentGroupException(Exception): + pass + +class TorrentGroup(object): + """ + Represents a Torrent Group (usually an album). Note that TorrentGroup.torrents may not be comprehensive if you + haven't called TorrentGroup.update_group_data()...it may have only been populated with filtered search results. + Check TorrentGroup.has_complete_torrent_list (boolean) to be sure. + """ + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.name = None + self.wiki_body = None + self.wiki_image = None + self.year = None + self.record_label = None + self.catalogue_number = None + self.tags = [] + self.release_type = None + self.vanity_house = None + self.has_bookmarked = None + self.category = None + self.time = None + self.music_info = None + self.torrents = [] + self.has_complete_torrent_list = False + + self.parent_api.cached_torrent_groups[self.id] = self + + def update_group_data(self): + response = self.parent_api.request(action='torrentgroup', id=self.id) + self.set_group_data(response) + + def set_group_data(self, torrent_group_json_response): + """ + Takes parsed JSON response from 'torrentgroup' action on api, and updates relevant information. + To avoid problems, only pass in data from an API call that used this torrentgroup's ID as an argument. + """ + if self.id != torrent_group_json_response['group']['id']: + raise InvalidTorrentGroupException("Tried to update a TorrentGroup's information from an 'artist' API call with a different id." + + " Should be %s, got %s" % (self.id, torrent_group_json_response['group']['groupId']) ) + + self.name = torrent_group_json_response['group']['name'] + self.year = torrent_group_json_response['group']['year'] + self.wiki_body = torrent_group_json_response['group']['wikiBody'] + self.wiki_image = torrent_group_json_response['group']['wikiImage'] + self.record_label = torrent_group_json_response['group']['recordLabel'] + self.catalogue_number = torrent_group_json_response['group']['catalogueNumber'] + + self.release_type = torrent_group_json_response['group']['releaseType'] + self.category = self.parent_api.get_category(torrent_group_json_response['group']['categoryId'], + torrent_group_json_response['group']['categoryName']) + self.time = torrent_group_json_response['group']['time'] + self.vanity_house = torrent_group_json_response['group']['vanityHouse'] + + self.music_info = torrent_group_json_response['group']['musicInfo'] + self.music_info['artists'] = [ self.parent_api.get_artist(artist['id'], artist['name']) + for artist in self.music_info['artists'] ] + self.music_info['with'] = [ self.parent_api.get_artist(artist['id'], artist['name']) + for artist in self.music_info['with'] ] + + self.torrents = [] + for torrent_dict in torrent_group_json_response['torrents']: + torrent_dict['groupId'] = self.id + torrent = self.parent_api.get_torrent(torrent_dict['id']) + torrent.set_torrent_group_data(torrent_dict) + self.torrents.append(torrent) + self.has_complete_torrent_list = True + + def set_artist_group_data(self, artist_group_json_response): + """ + Takes torrentgroup section from parsed JSON response from 'artist' action on api, and updates relevant information. + """ + if self.id != artist_group_json_response['groupId']: + raise InvalidTorrentGroupException("Tried to update a TorrentGroup's information from an 'artist' API call with a different id." + + " Should be %s, got %s" % (self.id, artist_group_json_response['groupId']) ) + + self.name = artist_group_json_response['groupName'] + self.year = artist_group_json_response['groupYear'] + self.record_label = artist_group_json_response['groupRecordLabel'] + self.catalogue_number = artist_group_json_response['groupCatalogueNumber'] + + self.tags = [] + for tag_name in artist_group_json_response['tags']: + tag = self.parent_api.get_tag(tag_name) + self.tags.append(tag) + + self.release_type = artist_group_json_response['releaseType'] + self.has_bookmarked = artist_group_json_response['hasBookmarked'] + + self.torrents = [] + for torrent_dict in artist_group_json_response['torrent']: + torrent = self.parent_api.get_torrent(torrent_dict['id']) + torrent.set_torrent_artist_data(torrent_dict) + self.torrents.append(torrent) + self.has_complete_torrent_list = True + + def set_torrent_search_data(self, search_json_response): + if self.id != search_json_response['groupId']: + raise InvalidTorrentGroupException("Tried to update a TorrentGroup's information from an 'browse'/search API call with a different id." + + " Should be %s, got %s" % (self.id, search_json_response['groupId']) ) + + self.name = search_json_response['groupName'] + # purposefully ignoring search_json_response['artist']...the other data updates don't include it, would just get confusing + self.tags = [] + for tag_name in search_json_response['tags']: + tag = self.parent_api.get_tag(tag_name) + self.tags.append(tag) + # some of the below keys aren't in things like comics...should probably watch out for this elsewhere + if 'bookmarked' in search_json_response.keys(): + self.has_bookmarked = search_json_response['bookmarked'] + if 'vanityHouse' in search_json_response.keys(): + self.vanity_house = search_json_response['vanityHouse'] + if 'groupYear' in search_json_response.keys(): + self.year = search_json_response['groupYear'] + if 'releaseType' in search_json_response.keys(): + self.release_type = search_json_response['releaseType'] + self.time = search_json_response['groupTime'] + if 'torrentId' in search_json_response.keys(): + search_json_response['torrents'] = [{'torrentId': search_json_response['torrentId']}] + + new_torrents = [] + for torrent_dict in search_json_response['torrents']: + torrent_dict['groupId'] = self.id + torrent = self.parent_api.get_torrent(torrent_dict['torrentId']) + new_torrents.append(torrent) + # torrent information gets populated in API search call, no need to duplicate that here + self.torrents = self.torrents + new_torrents + + + def __repr__(self): + return "TorrentGroup: %s - ID: %s" % (self.name, self.id) \ No newline at end of file diff --git a/lib/pygazelle/user.py b/lib/pygazelle/user.py new file mode 100644 index 00000000..f70c417d --- /dev/null +++ b/lib/pygazelle/user.py @@ -0,0 +1,217 @@ + + +class InvalidUserException(Exception): + pass + +class User(object): + """ + This class represents a User, whether your own or someone else's. It is created knowing only its ID. To reduce + API accesses, load information using User.update_index_data() or User.update_user_data only as needed. + """ + def __init__(self, id, parent_api): + self.id = id + self.parent_api = parent_api + self.username = None + self.authkey = None + self.passkey = None + self.avatar = None + self.is_friend = None + self.profile_text = None + self.notifications = None + self.stats = None + self.ranks = None + self.personal = None + self.community = None + + self.parent_api.cached_users[self.id] = self # add self to cache of known User objects + + def update_index_data(self): + """ + Calls 'index' API action, then updates this User objects information with it. + NOTE: Only call if this user is the logged-in user...throws InvalidUserException otherwise. + """ + response = self.parent_api.request(action='index') + self.set_index_data(response) + + def set_index_data(self, index_json_response): + """ + Takes parsed JSON response from 'index' action on api, and updates the available subset of user information. + ONLY callable if this User object represents the currently logged in user. Throws InvalidUserException otherwise. + """ + if self.id != index_json_response['id']: + raise InvalidUserException("Tried to update non-logged-in User's information from 'index' API call." + + " Should be %s, got %s" % (self.id, index_json_response['id']) ) + + self.username = index_json_response['username'] + + self.authkey = index_json_response['authkey'] + self.passkey = index_json_response['passkey'] + self.notifications = index_json_response['notifications'] + if self.stats: + self.stats = dict(self.stats.items() + index_json_response['userstats'].items()) # merge in new info + else: + self.stats = index_json_response['userstats'] + + # cross pollinate some data that is located in multiple locations in API + if self.personal: + self.personal['class'] = self.stats['class'] + self.personal['passkey'] = self.passkey + + + def update_user_data(self): + response = self.parent_api.request(action='user', id=self.id) + self.set_user_data(response) + + def set_user_data(self, user_json_response): + """ + Takes parsed JSON response from 'user' action on api, and updates relevant user information. + To avoid problems, only pass in user data from an API call that used this user's ID as an argument. + """ + if self.id != user_json_response['id']: + raise InvalidUserException("Tried to update a user's information from a 'user' API call with a different id." + + " Should be %s, got %s" % (self.id, user_json_response['id']) ) + + self.username = user_json_response['username'] + self.avatar = user_json_response['avatar'] + self.is_friend = user_json_response['isFriend'] + self.profile_text = user_json_response['profileText'] + if self.stats: + self.stats = dict(self.stats.items() + user_json_response['stats'].items()) # merge in new info + else: + self.stats = user_json_response['stats'] + self.ranks = user_json_response['ranks'] + self.personal = user_json_response['personal'] + self.community = user_json_response['community'] + + # cross pollinate some data that is located in multiple locations in API + self.stats['class'] = self.personal['class'] + self.passkey = self.personal['passkey'] + + def set_search_result_data(self, search_result_item): + """ + Takes a single user result item from a 'usersearch' API call and updates user info. + """ + if self.id != search_result_item['userId']: + raise InvalidUserException("Tried to update existing user with another user's search result data (IDs don't match).") + + self.username = search_result_item['username'] + + if not self.personal: + self.personal = {} + + self.personal['donor'] = search_result_item['donor'] + self.personal['warned'] = search_result_item['warned'] + self.personal['enabled'] = search_result_item['enabled'] + self.personal['class'] = search_result_item['class'] + + def __repr__(self): + return "User: %s - ID: %s" % (self.username, self.id) + +#URL: +#ajax.php?action=usersearch +#Argument: +#search - The search term. +#{ +# "status": "success", +# "response": { +# "currentPage": 1, +# "pages": 1, +# "results": [ +# { +# "userId": 469, +# "username": "dr4g0n", +# "donor": true, +# "warned": false, +# "enabled": true, +# "class": "VIP" +# }, +# // ... +# ] +# } +#} + +#URL: +#ajax.php?action=user +# +#Arguments: +#id - id of the user to display +# +#Response format: +#{ +# "status": "success", +# "response": { +# "username": "xxxx", +# "avatar": "http://asdf.com/asdf.png", +# "isFriend": false, +# "profileText": "", +# "stats": { +# "joinedDate": "2007-10-28 14:26:12", +# "lastAccess": "2012-08-09 00:17:52", +# "uploaded": 585564424629, +# "downloaded": 177461229738, +# "ratio": 3.3, +# "requiredRatio": 0.6 +# }, +# "ranks": { +# "uploaded": 98, +# "downloaded": 95, +# "uploads": 85, +# "requests": 0, +# "bounty": 79, +# "posts": 98, +# "artists": 0, +# "overall": 85 +# }, +# "personal": { +# "class": "VIP", +# "paranoia": 0, +# "paranoiaText": "Off", +# "donor": true, +# "warned": false, +# "enabled": true, +# "passkey": "31d59d20ac9233bf2038e35e72c4d61e" +# }, +# "community": { +# "posts": 863, +# "torrentComments": 13, +# "collagesStarted": 0, +# "collagesContrib": 0, +# "requestsFilled": 0, +# "requestsVoted": 13, +# "perfectFlacs": 2, +# "uploaded": 29, +# "groups": 14, +# "seeding": 309, +# "leeching": 0, +# "snatched": 678, +# "invited": 7 +# } +# } +#} + +#URL: +#ajax.php?action=index +# +#Arguments: None +#{ +# "status": "success", +# "response": { +# "username": "xxxx", +# "id": 0000, +# "authkey": "redacted", +# "passkey": "redacted", +# "notifications": { +# "messages": 0, +# "notifications": 9000, +# "newAnnouncement": false, +# "newBlog": false +# }, +# "userstats": { +# "uploaded": 585564424629, +# "downloaded": 177461229738, +# "ratio": 3.29, +# "requiredratio": 0.6, +# "class": "VIP" +# } +# } +#} diff --git a/lib/whatapi.py b/lib/whatapi.py deleted file mode 100755 index ecdcd5cb..00000000 --- a/lib/whatapi.py +++ /dev/null @@ -1,1495 +0,0 @@ -# -*- coding: utf_8 -*- -################################################################################# -# -# Name: whatapi.py -# -# Synopsis: Module to manage what.cd as a web service -# -# Description: See below list of the implemented webservices -# -# Copyright 2010 devilcius -# -# The Wide Open License (WOL) -# -# Permission to use, copy, modify, distribute and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice and this license appear in all source copies. -# THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF -# ANY KIND. See http://www.dspguru.com/wide-open-license for more information. -# -################################################################################# - - -__author__ = "devilcius" -__date__ = "$Oct 23, 2010 11:21:12 PM$" - - -import hashlib -try: - from BeautifulSoup import BeautifulSoup, SoupStrainer -except: - raise ImportError, "Please install BeautifulSoup 3.2 module from http://www.crummy.com/software/BeautifulSoup/#Download" -import httplib -import os -import pickle -import re -import urllib -import shelve -import tempfile -import threading -from htmlentitydefs import name2codepoint as n2cp - - -""" -A list of the implemented webservices (from what.cd ) -===================================== - -# User - - * user.getUserId - * user.getInfo - - * user.getTorrentsSeeding - * user.getTorrentsSnatched - * user.getTorrentsUploaded - * user.getTorrentsCommented - - * user.specificUserInfo - Atributes: - ######## stats ########### - -joindate - -lastseen - -dataup - -datadown - -ratio - -rratio - ######## percentile ########### - -uppercentile - -downpercentile - -torrentsuppercentile - -reqfilledpercentile - -bountyspentpercentile - -postsmadepercentile - -artistsaddedpercentile - -overallpercentile - ######## community ########### - -postsmade - -torrentscomments - -collagesstarted - -collagescontr - -reqfilled - -reqvoted - -uploaded - -unique - -perfect - -seeding - -leeching - -snatched - -invited - -artistsadded - - -# Artist - - * artist.getArtistReleases - * artist.getArtistImage - * artist.getArtistInfo - * artist.getArtistTags - * artist.getArtistSimilar - * artist.getArtistRequests - - + artist.setArtistInfo - - -# Torrent - - * torrent.getTorrentParentId - * torrent.getTorrentDownloadURL - * torrent.getTorrentDetails - * torrent.getTorrentSize - * torrent.getTorrentSnatched - * torrent.getTorrentSeeders - * torrent.getTorrentLeechers - * torrent.getTorrentUploadedBy - * torrent.getTorrentFolderName - * torrent.getTorrentFileList - * torrent.getTorrentDescription - * torrent.getTorrentComments - * torrent.isTorrentFreeLeech - * torrent.isTorrentReported - - -# Authenticate - - * authenticate.getAuthenticatedUserId - * authenticate.getAuthenticatedUserAuthCode - * authenticate.getAuthenticatedUserDownload - * authenticate.getAuthenticatedUserUpload() - * authenticate.getAuthenticatedUserRatio - * authenticate.getAuthenticatedUserRequiredRatio - -""" - -class ResponseBody: - """A Response Body Object""" - pass - -class SpecificInformation: - """A Specific Information Object""" - pass - - -class WhatBase(object): - """An abstract webservices object.""" - whatcd = None - - def __init__(self, whatcd): - self.whatcd = whatcd - #if we are not autenticated in what.cd, do it now - if not self.whatcd.isAuthenticated(): - print "authenticating..." - self.whatcd.headers = Authenticate(self.whatcd).getAuthenticatedHeader() - - def _request(self, type, path, data, headers): - return Request(self.whatcd, type, path, data, headers) - - def _parser(self): - return Parser(self.whatcd) - - def utils(self): - return Utils() - - -class Utils(): - - def md5(self, text): - """Returns the md5 hash of a string.""" - - h = hashlib.md5() - h.update(self._string(text)) - - return h.hexdigest() - - def _unicode(self, text): - if type(text) == unicode: - return text - - if type(text) == int: - return unicode(text) - - return unicode(text, "utf-8") - - def _string(self, text): - if type(text) == str: - return text - - if type(text) == int: - return str(text) - - return text.encode("utf-8") - - def _number(self, string): - """ - Extracts an int from a string. Returns a 0 if None or an empty string was passed - """ - - if not string: - return 0 - elif string == "": - return 0 - else: - try: - return int(string) - except ValueError: - return float(string) - - def substituteEntity(self, match): - ent = match.group(2) - if match.group(1) == "#": - return unichr(int(ent)) - else: - cp = n2cp.get(ent) - - if cp: - return unichr(cp) - else: - return match.group() - - def decodeHTMLEntities(self, string): - entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") - return entity_re.subn(self.substituteEntity, string)[0] - - - -class WhatCD(object): - - def __init__(self, username, password, site, loginpage, headers): - - #credentials - self.username = username - self.password = password - self.site = site - self.loginpage = loginpage - self.headers = headers - self.authenticateduserinfo = {} - - self.cache_backend = None - self.proxy_enabled = False - self.proxy = None - - def isAuthenticated(self): - """ - Checks if we are authenticated in what.cd - """ - if "id" in self.authenticateduserinfo: - return True - else: - return False - - def getCredentials(self): - """ - Returns an authenticated user credentials object - """ - return Authenticate(self) - - - def getUser(self, username): - """ - Returns an user object - """ - return User(username, self) - - def getTorrent(self, id, page=1): - """ - Returns a torrent object - """ - return Torrent(id, page, None, self) - - def getTorrentGroup(self, id, page=1): - """ - Returns a torrent object - """ - return Torrent(id, page, True, self) - - def getArtist(self, name): - """ - Returns an artist object - """ - return Artist(name, self) - - def enableProxy(self, host, port): - """Enable a default web proxy""" - self.proxy = [host, Utils()._number(port)] - self.proxy_enabled = True - - def disableProxy(self): - """Disable using the web proxy""" - self.proxy_enabled = False - - def isProxyEnabled(self): - """Returns True if a web proxy is enabled.""" - return self.proxy_enabled - - def getProxy(self): - """Returns proxy details.""" - return self.proxy - - def enableCaching(self, file_path=None): - """Enables caching request-wide for all cachable calls. - * file_path: A file path for the backend storage file. If - None set, a temp file would probably be created, according the backend. - """ - if not file_path: - file_path = tempfile.mktemp(prefix="whatapi_tmp_") - - self.cache_backend = _ShelfCacheBackend(file_path) - - def disableCaching(self): - """Disables all caching features.""" - self.cache_backend = None - - def isCachingEnabled(self): - """Returns True if caching is enabled.""" - - return not (self.cache_backend == None) - - def getCacheBackend(self): - - return self.cache_backend - -def getWhatcdNetwork(username="", password=""): - """ - Returns a preconfigured WhatCD object for what.cd - # Parameters: - * username str: a username of a valid what.cd user - * password str: user's password - """ - - return WhatCD ( - username=username, - password=password, - site="ssl.what.cd", - loginpage="/login.php", - headers={ - "Content-type": "application/x-www-form-urlencoded", - 'Accept-Charset': 'utf-8', - 'User-Agent': "whatapi [devilcius]" - }) - - - -class _ShelfCacheBackend(object): - """Used as a backend for caching cacheable requests.""" - cache_lock = threading.Lock() - - def __init__(self, file_path=None): - self.shelf = shelve.open(file_path) - - def getHTML(self, key): - with _ShelfCacheBackend.cache_lock: - return self.shelf[key] - - def setHTML(self, key, xml_string): - with _ShelfCacheBackend.cache_lock: - self.shelf[key] = xml_string - - def hasKey(self, key): - with _ShelfCacheBackend.cache_lock: - return key in self.shelf.keys() - - -class Request(object): - """web service operation.""" - - def __init__(self, whatcd, type, path, data, headers): - - self.whatcd = whatcd - self.utils = Utils() - self.type = type - self.path = path - self.data = data - self.headers = headers - #enable catching? - if whatcd.isCachingEnabled(): - self.cache = whatcd.getCacheBackend() - - def getCacheKey(self): - """The cache key is a md5 hash of request params.""" - - key = self.type + self.path + self.data - return Utils().md5(key) - - def getCachedResponse(self): - """Returns a file object of the cached response.""" - - if not self.isCached(): - response = self.downloadResponse() - self.cache.setHTML(self.getCacheKey(), response) - return self.cache.getHTML(self.getCacheKey()) - - def isCached(self): - """Returns True if the request is already in cache.""" - - return self.cache.hasKey(self.getCacheKey()) - - def downloadResponse(self): - """Returns a ResponseBody object from the server.""" - - #print "downloading from %s" % (self.path) - conn = httplib.HTTPSConnection(self.whatcd.site) - rb = ResponseBody() - - if self.whatcd.isProxyEnabled(): - conn = httplib.HTTPSConnection(host=self.whatcd.getProxy()[0], port=self.whatcd.getProxy()[1]) - conn.request(method=self.type, url="https://" + self.whatcd.site + self.path, body=self.data, headers=self.headers) - else: - conn.request(self.type, self.path, self.data, self.headers) - - response = conn.getresponse() - rb.headers = response.getheaders() - # Rip all inline JavaScript out of the response in case it hasn't been properly escaped - rb.body = re.sub('', '', response.read()) - conn.close() - return rb - - def execute(self, cacheable=False): - """Depending if caching is enabled, returns response from the server or, if available, the cached response""" - if self.whatcd.isCachingEnabled() and cacheable: - response = self.getCachedResponse() - else: - response = self.downloadResponse() - - return response - -class Authenticate(WhatBase): - - def __init__(self, whatcd): - """Create an authenticated user object. - # Parameters: - * whatcd object: WhatCD object. - """ - self.whatcd = whatcd - self.parser = Parser(whatcd) - if not self.whatcd.isAuthenticated(): - self.getAuthenticatedHeader() - - def setCookie(self): - print "creating cookie" - f = open('cookie', 'w') - loginform = {'username': self.whatcd.username, 'password': self.whatcd.password\ - , 'keeplogged': '1', 'login': 'Login'} - data = urllib.urlencode(loginform) - response = self._request("POST", self.whatcd.loginpage, data, self.whatcd.headers).execute(True) - try: - cookie = dict(response.headers)['set-cookie'] - session = re.search("session=[^;]+", cookie).group(0) - self.whatcd.headers["Cookie"] = session - homepage = response.body - pickle.dump(self.whatcd.headers, f) - except (KeyError, AttributeError): - f.close() - os.remove('cookie') - self.whatcd.headers = None - raise Exception("Login failed, most likely bad creds or the site is down, nothing to do") - f.close() - - - def getAuthenticatedHeader(self): - """ - Log user in what.cd and returns the authenticated header - """ - homepage = None - if os.path.exists("cookie"): - f = open("cookie", "r") - try: - self.whatcd.headers = pickle.load(f) - except EOFError: - f.close() - os.remove("cookie") - print "invalid cookie, removed" - self.setCookie() - else: - self.setCookie() - #set authenticated user info - if 'id' not in self.whatcd.authenticateduserinfo: - self.whatcd.authenticateduserinfo = self.getAuthenticatedUserInfo(homepage) - - return self.whatcd.headers - - def getAuthenticatedUserInfo(self, homepage=None): - """ - Returns authenticated user's info - """ - if not homepage: - homepage = BeautifulSoup(self._request("GET", "/index.php", "", self.whatcd.headers).execute(True).body) - authuserinfo = self._parser().authenticatedUserInfo(homepage.find("div", {"id": "userinfo"})) - return authuserinfo - - def getAuthenticatedUserId(self): - """ - Returns authenticated user's id - """ - return self.whatcd.authenticateduserinfo["id"] - - def getAuthenticatedUserAuthCode(self): - """ - Returns authenticated user's authcode - """ - return self.whatcd.authenticateduserinfo["authcode"] - - - def getAuthenticatedUserUpload(self): - """ - Returns authenticated user's total uploaded data - """ - return self.whatcd.authenticateduserinfo["uploaded"] - - - def getAuthenticatedUserDownload(self): - """ - Returns authenticated user's total downloaded data - """ - return self.whatcd.authenticateduserinfo["downloaded"] - - - def getAuthenticatedUserRatio(self): - """ - Returns authenticated user's ratio - """ - return self.whatcd.authenticateduserinfo["ratio"] - - def getAuthenticatedUserRequiredRatio(self): - """ - Returns authenticated user's required ratio - """ - return self.whatcd.authenticateduserinfo["required"] - - -class User(WhatBase): - """A What.CD user""" - - def __init__(self, username, whatcd): - """Create an user object. - # Parameters: - * username str: The user's name. - - whatcd object: the what.cd network object - """ - WhatBase.__init__(self, whatcd) - self.name = username - self.whatcd = whatcd - self.userpage = "/user.php?" - self.userid = None - self.userinfo = None - - def getUserName(self): - """ - Returns user's name - """ - return self.username - - def getUserId(self): - """ - Returns user's id, None if user doesn't exists - """ - if self.userid: - return self.userid - else: - idform = {'action': "search", 'search': self.name} - data = urllib.urlencode(idform) - headers = self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).headers - if dict(headers) is None: - return None - else: - self.userid = dict(headers)['location'][12:] - return self.userid - - def getInfo(self): - """ - Returns a dictionary of {percentile:{dataup str, - datadown str, - overall str, - postmade str, - boutyspent str, - reqfilled str, - artistsadded str, - torrentsup str}, - stats: {uploaded str, - ratio str, - joined str, - downloaded str, - lastseen str, - rratio str}, - community: {uploaded tuple(total str, url str), - forumposts tuple(total str, url str), - invited tuple (total,None), - perfectflacs tuple(total str, url str), - contributedcollages tuple(total str, url str), - reqvoted tuple(total str, url str), - uniquegroups tuple(total str, url str) - torrentscomments tuple(total str, url str), - snatched tuple(total str, url str), - artists str, - reqfilled tuple(total str, url str), - startedcollages tuple(total str, url str), - leeching tuple(total str, url str), - seeding tuple(total str, url str)} - } - If paranoia is not Off, it returns None. - """ - if self.getUserId(): - form = {'id': self.getUserId()} - data = urllib.urlencode(form) - userpage = BeautifulSoup(self._request("GET", self.userpage + data, "", self.whatcd.headers).execute(True).body) - info = self._parser().userInfo(userpage.find("div", {"class": "sidebar"}), self.name) - self.userinfo = info - return info - else: - print "no user id retrieved" - return None - - - def getTorrentsSeeding(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=seeding&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - def getTorrentsSnatched(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=snatched&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - def getTorrentsUploaded(self, page=1): - """ - Returns a list with all user's uploaded music torrents - in form of dictionary {page(tuple with current and total),tag, dlurl, id, - artist(a tuple with 1 artist name || 2 names in case of two artists || 'Various Artists' if V.A.}, - album, release type, scene, year and artistid (a tuple with 1 artist id || 2 ids if 2 artists torrent || empty if V.A.} - """ - if self.userid is None: - self.userid = self.getUserId() - url = "/torrents.php?type=uploaded&userid=%s&page=%d" % (self.userid, page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().torrentsList(torrentspage) - - - def getTorrentsCommented(self, page=1): - """ - Returns a list with all user's commented torrents - in form of dictionary {postid, torrentid, comment,postdate, pages} - - """ - if self.userid is None: - self.userid = self.getUserId() - - url = "/%s&page=%d" % (self.specificUserInfo().torrentscomments[1], page) - torrentspage = BeautifulSoup(self._request("GET", url, "", self.whatcd.headers).execute(True).body) - return self._parser().postsList(torrentspage) - - - - ############################################### - # specific values # - ############################################### - - - def specificUserInfo(self): - """ - Returns specific attributes of user info. None if user's paranoia is on - """ - info = SpecificInformation() - # Initialize attributes - info.joindate, info.lastseen, info.dataup, info.datadown,\ - info.ratio, info.rratio, info.uppercentile, info.downpercentile,\ - info.torrentsuppercentile, info.reqfilledpercentile, info.bountyspentpercentile,\ - info.postsmadepercentile, info.artistsaddedpercentile, info.overallpercentile,\ - info.postsmadecom, info.torrentscommentscom, info.collagesstartedcom, info.collagescontrcon,\ - info.reqfilledcom, info.reqvotedcom, info.uploadedcom, info.uniquecom, info.perfectcom,\ - info.seedingcom, info.leechingcom, info.snatchedcom, info.invitedcom, info.artistsaddedcom\ - = (None, None, None, None, None, None, None, None, None, None, None, None, None, None,\ - None, None, None, None, None, None, None, None, None, None, None, None, None, None) - - - if not self.userinfo and self.getInfo() is None: - pass - else: - ######## stats ########### - info.joindate = self.userinfo['stats']['joined'] - info.lastseen = self.userinfo['stats']['lastseen'] - info.dataup = self.userinfo['stats']['uploaded'] - info.datadown = self.userinfo['stats']['downloaded'] - info.ratio = self.userinfo['stats']['ratio'] - info.rratio = self.userinfo['stats']['rratio'] - ######## percentile ########### - info.uppercentile = self.userinfo['percentile']['dataup'] - info.downpercentile = self.userinfo['percentile']['datadown'] - info.torrentsuppercentile = self.userinfo['percentile']['torrentsup'] - info.reqfilledpercentile = self.userinfo['percentile']['reqfilled'] - info.bountyspentpercentile = self.userinfo['percentile']['bountyspent'] - info.postsmadepercentile = self.userinfo['percentile']['postsmade'] - info.artistsaddedpercentile = self.userinfo['percentile']['artistsadded'] - info.overallpercentile = self.userinfo['percentile']['overall'] - ######## community ########### - info.postsmadecom = self.userinfo['community']['forumposts'] - info.torrentscomments = self.userinfo['community']['torrentscomments'] - info.collagesstartedcom = self.userinfo['community']['startedcollages'] - info.collagescontrcon = self.userinfo['community']['contributedcollages'] - info.reqfilledcom = self.userinfo['community']['reqfilled'] - info.reqvotedcom = self.userinfo['community']['reqvoted'] - info.uploadedcom = self.userinfo['community']['uploaded'] - info.uniquecom = self.userinfo['community']['uniquegroups'] - info.perfectcom = self.userinfo['community']['pefectflacs'] - info.seedingcom = self.userinfo['community']['seeding'] - info.leechingcom = self.userinfo['community']['leeching'] - info.snatchedcom = self.userinfo['community']['snatched'] - info.invitedcom = self.userinfo['community']['invited'][0] - info.artistsaddedcom = self.userinfo['community']['artists'] - - - - return info - - -class Torrent(WhatBase): - """A What.CD torrent""" - - def __init__(self, id, page, isparent, whatcd): - """Create a torrent object. - # Parameters: - * id str: The torrent's id. - * whatcd object: the WhatCD network object - * page: The torrent page's number [optional] - """ - WhatBase.__init__(self, whatcd) - self.id = id - self.page = page - self.whatcd = whatcd - self.isParent = isparent - self.torrentpage = "/torrents.php?" - self.torrentinfo = self.getInfo() - - - def getTorrentUrl(self): - """ - Returns torrent's URL - """ - if self.isParent: - form = {'id': self.id, 'page':self.page} - data = urllib.urlencode(form) - return self.torrentpage + data - else: - form = {'torrentid': self.id, 'page':self.page} - data = urllib.urlencode(form) - headers = self._request("GET", self.torrentpage + data, "", self.whatcd.headers).execute(True).headers - - if dict(headers) is None: - return None - else: - if 'location' not in dict(headers).keys(): - return None - else: - return dict(headers)['location'] - - - def getInfo(self): - """ - Returns a dictionnary with torrents's info - """ - if self.getTorrentUrl() is None: - print "no torrent retrieved with such id" - return None - - torrentpage = BeautifulSoup(self._request("GET", "/" + self.getTorrentUrl(), "", self.whatcd.headers).execute(True).body) - - if 'Site log' in torrentpage.find("title").string: - print "no torrent retrieved with such id" - return None - else: - return self._parser().torrentInfo(torrentpage, self.id, self.isParent) - - - def getTorrentParentId(self): - """ - Returns torrent's group id - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['parentid'] - - def getTorrentChildren(self): - """ - Returns list of children if is a torrent group, else returns own id in list - """ - if self.isParent: - return self.torrentinfo['torrent']['childrenids'] - else: - return [self.id] - - def getTorrentDownloadURL(self): - """ - Returns relative url to download the torrent - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['downloadurl'] - - def getTorrentDetails(self): - """ - Returns torrent's details (format / bitrate) - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['details'] - - def getTorrentEditionInfo(self): - """ - Returns torrent's edition info (Edition information / media type) - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['editioninfo'] - - def getTorrentMediaType(self): - """ - Returns torrent's media type - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['rlsmedia'] - - def getTorrentSize(self): - """ - Returns torrent's size - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['size'] - - - def getTorrentSnatched(self): - """ - Returns torrent's total snatches - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['snatched'] - - - def getTorrentSeeders(self): - """ - Returns torrent's current seeders - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['seeders'] - - def getTorrentLeechers(self): - """ - Returns torrent's current leechers - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['leechers'] - - def getTorrentUploadedBy(self): - """ - Returns torrent's uploader - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['uploadedby'] - - def getTorrentFolderName(self): - """ - Returns torrent's folder name - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['foldername'] - - def getTorrentFileList(self): - """ - Returns torrent's file list - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['filelist'] - - - def getTorrentReleaseType(self): - """ - Returns torrent's release type - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['rlstype'] - - def getTorrentDescription(self): - """ - Returns torrent's description / empty string is there's none - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['torrentdescription'] - - def getTorrentComments(self): - """ - Returns a list of dictionnaries with each comment in the torrent page - {postid,post,userid,username} - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['comments'] - - def getTorrentCommentsPagesNumber(self): - """ - Returns number of pages of comments in the torrent - """ - if self.torrentinfo: - return self.torrentInfo['torrent']['commentspages'] - - def isTorrentFreeLeech(self): - """ - Returns True if torrent is freeleeech, False if not - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['isfreeleech'] - - def isTorrentReported(self): - """ - Returns True if torrent is reported, False if not - """ - if self.torrentinfo: - return self.torrentinfo['torrent']['isreported'] - - -class Artist(WhatBase): - """A What.CD artist""" - - def __init__(self, name, whatcd): - """Create an artist object. - # Parameters: - * name str: The artist's name. - * whatcd object: The WhatCD network object - """ - WhatBase.__init__(self, whatcd) - self.name = name - self.whatcd = whatcd - self.artistpage = "/artist.php" - self.utils = Utils() - self.info = self.getInfo() - - - def getArtistName(self): - """ - Returns artist's name - """ - return self.name - - def getArtistId(self): - """ - Returns artist's id, None if artist's not found - """ - form = {'artistname': self.name} - data = urllib.urlencode(form) - headers = self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).headers - if dict(headers)['location'][0:14] != 'artist.php?id=': - return None - else: - return dict(headers)['location'][14:] - - def getInfo(self): - """ - Returns artist's info, None if there isn't - """ - if self.getArtistId(): - form = {'id': self.getArtistId()} - data = urllib.urlencode(form) - artistpage = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body) - return self._parser().artistInfo(artistpage) - else: - print "no artist info retrieved" - return None - - def getArtistReleases(self): - """ - Returns a list with all artist's releases in form of dictionary {releasetype, year, name, id} - """ - return self.info['releases'] - - def getArtistImage(self): - """ - Return the artist image URL, None if there's no image - """ - return self.info['image'] - - def getArtistInfo(self): - """ - Return the artist's info, blank string if none - """ - return self.info['info'] - - def getArtistTags(self): - """ - Return a list with artist's tags - """ - return self.info['tags'] - - def getArtistSimilar(self): - """ - Return a list with artist's similar artists - """ - return self.info['similarartists'] - - def getArtistRequests(self): - """ - Returns a list with all artist's requests in form of dictionary {requestname, id} - """ - return self.info['requests'] - - def setArtistInfo(self, id, info): - """ - Updates what.cd artist's info and image - Returns 1 if artist info updated succesfully, 0 if not. - # Parameters: - * id str: what.cd artist's id - * info tuple: (The artist's info -str-, image url -str- (None if there isn't)) - """ - if info[0]: - params = {'action': 'edit', 'artistid':id} - data = urllib.urlencode(params) - - edit_page = BeautifulSoup(self._request("GET", self.artistpage + "?" + data, "", self.whatcd.headers).execute(True).body) - what_form = self._parser().whatForm(edit_page, 'edit') - if info[1]: - image_to_post = info[1] - else: - image_to_post = what_form['image'] - data_to_post = {'body': info[0].encode('utf-8'), - 'summary':'automated artist info insertion',\ - 'image':image_to_post,\ - 'artistid':what_form['artistid'],\ - 'auth':what_form['auth'],\ - 'action':what_form['action']} - - #post artist's info - self.whatcd.headers['Content-type'] = "application/x-www-form-urlencoded" - response = self._request("POST", self.artistpage, urllib.urlencode(data_to_post), self.whatcd.headers).execute(False) - artist_id_returned = dict(response.headers)['location'][14:] - - if str(artist_id_returned) == str(what_form['artistid']): - return 1 - else: - return 0 - - else: - return 'no artist info provided. Aborting.' - exit() - - -class Parser(object): - - def __init__(self, whatcd): - self.utils = Utils() - self.whatcd = whatcd - self.totalpages = 0 - - def authenticatedUserInfo(self, dom): - """ - Parse the index page and returns a dictionnary with basic authenticated user information - """ - userInfo = {} - soup = BeautifulSoup(str(dom)) - for ul in soup.fetch('ul'): - ul_all_li = ul.findAll('li') - if ul["id"] == "userinfo_username": - #retrieve user logged id - hrefid = ul_all_li[0].find("a")["href"] - regid = re.compile('[0-9]+') - if regid.search(hrefid) is None: - self.debugMessage("not found href to retrieve user id") - else: - userInfo["id"] = regid.search(hrefid).group(0) - - #retrieve user logged id - hrefauth = ul_all_li[2].find("a")["href"] - regauth = re.compile('=[0-9a-zA-Z]+') - if regid.search(hrefid) is None: - self.debugMessage("not found href to retrieve user id") - else: - userInfo["authcode"] = regauth.search(hrefauth).group(0)[1:] - - elif ul["id"] == "userinfo_stats": - if len(ul_all_li) > 0: - userInfo["uploaded"] = ul_all_li[0].find("span").string - userInfo["downloaded"] = ul_all_li[1].find("span").string - userInfo["ratio"] = ul_all_li[2].findAll("span")[1].string - userInfo["required"] = ul_all_li[3].find("span").string - userInfo["authenticate"] = True - - return userInfo - - def userInfo(self, dom, user): - """ - Parse an user's page and returns a dictionnary with its information - - # Parameters: - * dom str: user page html - * user str: what.cd username - """ - userInfo = {'stats':{}, 'percentile':{}, 'community':{}} - soup = BeautifulSoup(str(dom)) - - for div in soup.fetch('div', {'class':'box'}): - - #if paronoia is not set to 'Off', stop collecting data - if div.findAll('div')[0].string == "Personal": - if div.find('ul').findAll('li')[1].contents[1].string.strip() != "Off": - return None - - all_div_box = soup.findAll('div', {'class': 'box'}) - statscontainer = all_div_box[1] - percentilecontainer = all_div_box[2] - communitycontainer = all_div_box[4] - - statscontainer_all_li = statscontainer.findAll('li') - userInfo['stats']['joined'] = statscontainer_all_li[0].find('span')['title'] - userInfo['stats']['lastseen'] = statscontainer_all_li[1].find('span')['title'] - userInfo['stats']['uploaded'] = statscontainer_all_li[2].string[10:] - userInfo['stats']['downloaded'] = statscontainer_all_li[3].string[12:] - userInfo['stats']['ratio'] = statscontainer_all_li[4].find('span').string - userInfo['stats']['rratio'] = statscontainer_all_li[5].string[16:] - -# percentilecontainer_all_li = percentilecontainer.findAll('li') -# userInfo['percentile']['dataup'] = percentilecontainer_all_li[0].string[15:] -# userInfo['percentile']['datadown'] = percentilecontainer_all_li[1].string[17:] -# userInfo['percentile']['torrentsup'] = percentilecontainer_all_li[2].string[19:] -# userInfo['percentile']['reqfilled'] = percentilecontainer_all_li[3].string[17:] -# userInfo['percentile']['bountyspent'] = percentilecontainer_all_li[4].string[14:] -# userInfo['percentile']['postsmade'] = percentilecontainer_all_li[5].string[12:] -# userInfo['percentile']['artistsadded'] = percentilecontainer_all_li[6].string[15:] -# userInfo['percentile']['overall'] = percentilecontainer_all_li[7].find('strong').string[14:] - -# communitycontainer_all_li = communitycontainer.findAll('li') -# userInfo['community']['forumposts'] = (communitycontainer_all_li[0].contents[0].string[13:len(communitycontainer_all_li[0].contents[0].string)-2],\ -# communitycontainer_all_li[0].find('a')['href']) -# userInfo['community']['torrentscomments'] = (communitycontainer_all_li[1].contents[0].string[18:len(communitycontainer_all_li[1].contents[0].string)-2],\ -# communitycontainer_all_li[1].find('a')['href']) -# userInfo['community']['startedcollages'] = (communitycontainer_all_li[2].contents[0].string[18:len(communitycontainer_all_li[2].contents[0].string)-2],\ -# communitycontainer_all_li[2].find('a')['href']) -# userInfo['community']['contributedcollages'] = (communitycontainer_all_li[3].contents[0].string[25:len(communitycontainer_all_li[3].contents[0].string)-2],\ -# communitycontainer_all_li[3].find('a')['href']) -# userInfo['community']['reqfilled'] = (communitycontainer_all_li[4].contents[0].string[17:len(communitycontainer_all_li[4].contents[0].string)-2],\ -# communitycontainer_all_li[4].find('a')['href']) -# userInfo['community']['reqvoted'] = (communitycontainer_all_li[5].contents[0].string[16:len(communitycontainer_all_li[5].contents[0].string)-2],\ -# communitycontainer_all_li[5].find('a')['href']) -# userInfo['community']['uploaded'] = (communitycontainer_all_li[6].contents[0].string[10:len(communitycontainer_all_li[6].contents[0].string)-2],\ -# communitycontainer_all_li[6].find('a')['href']) -# userInfo['community']['uniquegroups'] = (communitycontainer_all_li[7].contents[0].string[15:len(communitycontainer_all_li[7].contents[0].string)-2],\ -# communitycontainer_all_li[7].find('a')['href']) -# userInfo['community']['pefectflacs'] = (communitycontainer_all_li[8].contents[0].string[16:len(communitycontainer_all_li[8].contents[0].string)-2],\ -# communitycontainer_all_li[8].find('a')['href']) -# userInfo['community']['seeding'] = (communitycontainer_all_li[9].contents[0].string[9:len(communitycontainer_all_li[9].contents[0].string)-2],\ -# communitycontainer_all_li[9].find('a')['href']) -# userInfo['community']['leeching'] = (communitycontainer_all_li[10].contents[0].string[10:len(communitycontainer_all_li[10].contents[0].string)-2],\ -# communitycontainer_all_li[10].find('a')['href']) -# #NB: there's a carriage return and white spaces inside the snatched li tag -# userInfo['community']['snatched'] = (communitycontainer_all_li[11].contents[0].string[10:len(communitycontainer_all_li[11].contents[0].string)-7],\ -# communitycontainer_all_li[11].find('a')['href']) -# userInfo['community']['invited'] = (communitycontainer_all_li[12].contents[0].string[9:],\ -# None) -# userInfo['community']['artists'] = percentilecontainer_all_li[6]['title'] - - return userInfo - - def torrentInfo(self, dom, id, isparent): - """ - Parse a torrent's page and returns a dictionnary with its information - """ - - torrentInfo = {'torrent':{}} - torrentfiles = [] - torrentdescription = "" - isreported = False - isfreeleech = False - soup = BeautifulSoup(str(dom)) - if isparent: - torrentInfo['torrent']['parentid'] = id - torrentInfo['torrent']['childrenids'] = [] - for torrent in soup.findAll('tr', {'class':re.compile(r'\bgroupid_%s.+edition_\d.+group_torrent' % id)}): - child_id = re.search('\d+$', torrent['id']).group(0) - if child_id: - torrentInfo['torrent']['childrenids'].append(child_id) - else: - groupidurl = soup.findAll('div', {'class':'linkbox'})[0].find('a')['href'] - torrentInfo['torrent']['editioninfo'] = soup.findAll('td', {'class':'edition_info'})[0].find('strong').contents[-1] - regrlsmedia = re.compile('CD|DVD|Vinyl|Soundboard|SACD|Cassette|WEB|Blu-ray') - torrentInfo['torrent']['rlsmedia'] = regrlsmedia.search(torrentInfo['torrent']['editioninfo']).group(0) - torrentInfo['torrent']['parentid'] = groupidurl[groupidurl.rfind("=") + 1:] - - all_tr_id_torrent = soup.findAll('tr', {'id': 'torrent%s' % id}) - all_torrent_a = all_tr_id_torrent[0].findAll('a') - - torrentInfo['torrent']['downloadurl'] = all_tr_id_torrent[0].findAll('a', {'title':'Download'})[0]['href'] - ## is freeleech or/and reported? ## - #both - if len(all_torrent_a[-1].contents) == 4: - isreported = True - isfreeleech = True - torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] - #either - elif len(all_torrent_a[-1].contents) == 2: - if all_torrent_a[-1].contents[1].string == 'Reported': - isreported = True - elif all_torrent_a[-1].contents[1].string == 'Freeleech!': - isreported = True - torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] - #none - else: - torrentInfo['torrent']['details'] = all_torrent_a[-1].contents[0] - torrentInfo['torrent']['isfreeleech'] = isfreeleech - torrentInfo['torrent']['isreported'] = isreported - - all_torrent_td = all_tr_id_torrent[0].findAll('td') - torrentInfo['torrent']['size'] = all_torrent_td[1].string - torrentInfo['torrent']['snatched'] = all_torrent_td[2].string - torrentInfo['torrent']['seeders'] = all_torrent_td[3].string - torrentInfo['torrent']['leechers'] = all_torrent_td[4].string - - all_tr_id_torrent_underscore = soup.findAll('tr', {'id': 'torrent_%s' % id}) - torrentInfo['torrent']['uploadedby'] = all_tr_id_torrent_underscore[0].findAll('a')[0].string - foldername = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('div')[1].string - if(foldername is None): - torrentInfo['torrent']['foldername'] = None - else: - torrentInfo['torrent']['foldername'] = self.utils.decodeHTMLEntities(foldername) - files = soup.findAll('div', {'id':'files_%s' % id})[0].findAll('tr') - for file in files[1:-1]: - torrentfiles.append(self.utils.decodeHTMLEntities(file.contents[0].string)) - torrentInfo['torrent']['filelist'] = torrentfiles - #is there any description? -# all_torrent_blockquote = all_tr_id_torrent_underscore[0].findAll('blockquote') -# if len(all_torrent_blockquote) > 1: -# description = torrentInfo['torrent']['description'] = all_torrent_blockquote[1].contents -# info = '' -# for content in description: -# if content.string: -# info = "%s%s" % (info, self.utils._string(content.string)) -# torrentdescription = "%s%s" % (torrentdescription, self.utils._string(content.string)) -# torrentInfo['torrent']['torrentdescription'] = torrentdescription - regrlstype = re.compile('Album|Soundtrack|EP|Anthology|Compilation|DJ Mix|Single|Live album|Remix|Bootleg|Interview|Mixtape|Unknown') - torrentInfo['torrent']['rlstype'] = regrlstype.search(soup.find('div', {'class':'thin'}).find('h2').contents[1]).group(0) - - torrentInfo['torrent']['comments'] = [] - torrentInfo['torrent']['commentspages'] = 0 - - # if len(soup.findAll('table', {'class':'forum_post box vertical_margin'})) > 0: - # linkbox = dom.findAll("div", {"class": "linkbox"})[-1] - # pages = 1 - # postid = '' - # userid = '' - # post = '' - # # if there's more than 1 page of torrents - # linkbox_all_a = linkbox.findAll("a") - # if len(linkbox_all_a): - # # by default torrent page show last page of comments - # lastpage = linkbox_all_a[-1]['href'] - # pages = int(lastpage[18:lastpage.find('&')]) + 1 - # for comment in soup.findAll('table', {'class':'forum_post box vertical_margin'}): - # postid = comment.find("a", {"class":"post_id"}).string[1:] - # - # all_comment_a = comment.findAll("a") - # userid = all_comment_a[1]['href'][12:] - # username = all_comment_a[1].string - # post = comment.find("div", {"id":"content" + postid}) - # post = u''.join([post.string for post in post.findAll(text=True)]) - # torrentInfo['torrent']['comments'].append({"postid":postid, "post":post, "userid":userid, "username":username}) - # - # torrentInfo['torrent']['commentspages'] = pages - - return torrentInfo - - def artistInfo(self, dom): - """ - Parse an artist's page and returns a dictionnary with its information - """ - artistInfo = {} - releases = [] - requests = [] - infoartist = "" - tagsartist = [] - similarartists = [] - soup = BeautifulSoup(str(dom)) - for releasetype in soup.fetch('table', {'class': re.compile(r'\btorrent_table\b')}): - releasetypenames = releasetype.findAll('strong') - releasetypename = releasetypenames[0].string - for release in releasetypenames[1:-1]: - #skip release edition info and Freeleech! s - if len(release.parent.contents) > 1 and len(release.contents) > 1: - releaseyear = release.contents[0][0:4] - releasename = release.contents[1].string - releasehref = release.contents[1]['href'] - releaseid = releasehref[releasehref.rfind('=') + 1:] - releases.append({'releasetype':releasetypename,\ - 'year': releaseyear, 'name':self.utils.decodeHTMLEntities(releasename), 'id':releaseid}) - - artistInfo['releases'] = releases - - # This artist stuff wastes 10 secs - - #is there an artist image? -# artistInfo['image'] = None -# div_box = soup.find('div', {'class': 'box'}) -# if div_box.find('img'): -# artistInfo['image'] = div_box.find('img')['src'] -# #is there any artist info? -# contents = soup.find('div', {'class':'body'}).contents -# if len(contents) > 0: -# for content in contents: -# if content.string: -# infoartist = "%s%s" % (infoartist, self.utils._string(content.string)) -# artistInfo['info'] = self.utils.decodeHTMLEntities(infoartist) - #is there any artist tags? -# all_ul_class_stats_nobullet = soup.findAll('ul', {'class': 'stats nobullet'}) -# all_ul_class_stats_nobullet_li = all_ul_class_stats_nobullet[0].findAll('li') -# if all_ul_class_stats_nobullet_li: -# ul = all_ul_class_stats_nobullet_li -# for li in ul: -# if li.contents[0].string: -# tagsartist.append(self.utils._string(li.contents[0].string)) -# artistInfo['tags'] = tagsartist - #is there any similar artist? -# if all_ul_class_stats_nobullet[2].findAll('span', {'title':'2'}): -# artists = all_ul_class_stats_nobullet[2].findAll('span', {'title':'2'}) -# for artist in artists: -# if artist.contents[0].string: -# similarartists.append(self.utils._string(artist.contents[0].string)) -# artistInfo['similarartists'] = similarartists - #is there any request? -# table_requests = soup.find('table', {'id': 'requests'}) -# if table_requests: -# for request in table_requests.findAll('tr', {'class':re.compile('row')}): -# request_all_a_1 = request.findAll('a')[1] -# requests.append({'requestname': request_all_a_1.string, 'id': request_all_a_1['href'][28:]}) -# -# artistInfo['requests'] = requests - - return artistInfo - - def torrentsList(self, dom): - """ - Parse a torrent's list page and returns a dictionnary with its information - """ - torrentslist = [] - torrentssoup = dom.find("table", {"width": "100%"}) - pages = 0 - - #if there's at least 1 torrent in the list - if torrentssoup: - navsoup = dom.find("div", {"class": "linkbox"}) - pages = 1 - regyear = re.compile('\[\d{4}\]') - - #is there a page navigation bar? - if navsoup.contents: - #if there's more than 1 page of torrents - if navsoup.contents[-1].has_key('href'): - lastpage = navsoup.contents[-1]['href'] - pages = lastpage[18:lastpage.find('&')] - self.totalpages = pages - else: #we are at the last page, no href - pages = self.totalpages + 1 - #fetch all tr except first one (column head) - for torrent in torrentssoup.fetch('tr')[1:]: - #exclude non music torrents - if torrent.find('td').find('div')['class'][0:10] == 'cats_music': - - torrenttag = torrent.find('td').contents[1]['title'] - all_td_1_span_a = torrent.findAll('td')[1].find('span').findAll('a') - torrentdl = all_td_1_span_a[0]['href'] - torrentrm = all_td_1_span_a[1]['href'] - torrentid = torrentrm[torrentrm.rfind('=') + 1:] - torrenttd = torrent.findAll('td')[1] - - # remove dataless elements - torrenttags = torrenttd.div - rightlinks = torrenttd.span - torrenttags.extract() - rightlinks.extract() - - # remove line breaks - torrenttd = "".join([line.strip() for line in str(torrenttd).split("\n")]) - torrenttd = BeautifulSoup(torrenttd) - isScene = False - info = "" - - torrenttd_find_a = torrenttd.find("a") - torrenttd_all_a = torrenttd.findAll("a") - if len(torrenttd_all_a) == 2: - #one artist - torrentartist = (self.utils.decodeHTMLEntities(torrenttd_find_a.string), ) - artistid = (torrenttd_find_a['href'][14:], ) - torrentalbum = torrenttd_all_a[1].string - info = torrenttd_all_a[1].nextSibling.string.strip() - - - elif len(torrenttd_all_a) == 1: - #various artists - torrentartist = ('Various Artists', ) - artistid = () - torrentalbum = torrenttd_find_a.string - info = torrenttd_find_a.nextSibling.string.strip() - - elif len(torrenttd_all_a) == 3: - #two artists - torrentartist = (self.utils.decodeHTMLEntities(torrenttd_all_a[0].string),\ - self.utils.decodeHTMLEntities(torrenttd_all_a[1].string)) - artistid = (torrenttd_all_a[0]['href'][14:],\ - torrenttd_all_a[1]['href'][14:]) - torrentalbum = torrenttd_all_a[2].string - info = torrenttd_all_a[2].nextSibling.string.strip() - - elif torrenttd.find(text=re.compile('performed by')): - #performed by - torrentartist = (self.utils.decodeHTMLEntities(torrenttd_all_a[-2].string), ) - artistid = (torrenttd_all_a[-2]['href'][14:], ) - torrentalbum = torrenttd_all_a[-1].string - info = torrenttd_all_a[-1].nextSibling.string.strip() - - if 'Scene' in info: - isScene = True - - torrentyear = regyear.search(info).group(0)[1:5] - torrentslist.append({'tag':torrenttag,\ - 'dlurl':torrentdl,\ - 'id':torrentid,\ - 'artist':torrentartist,\ - 'artistid':artistid,\ - 'album':self.utils.decodeHTMLEntities(torrentalbum), - 'year':torrentyear, - 'pages':pages, - 'scene':isScene}) - - return torrentslist - - def postsList(self, dom): - """ - Parse a post list page and returns a dictionnary with each post information: - {torrentid, commentid, postid} - """ - postslist = [] - postssoup = dom.find("div", {"class": "thin"}) - pages = 0 - - #if there's at least 1 post in the list - if postssoup: - navsoup = dom.find("div", {"class": "linkbox"}) - - #if there's more than 1 page of torrents - if navsoup.find("a"): - lastpage = navsoup.findAll("a")[1]['href'] - pages = lastpage[18:lastpage.find('&')] - self.totalpages = pages - else: #we are at the last page, no link - pages = 1 - - for post in postssoup.fetch('table', {'class':'forum_post box vertical_margin'}): - commentbody = post.find("td", {"class":"body"}) - postid = post.find("span").findAll("a")[0].string[1:] - torrentid = post.find("span").findAll("a")[-1]['href'][post.find("span").findAll("a")[-1]['href'].rfind('=') + 1:] - comment = u''.join([commentbody.string for commentbody in commentbody.findAll(text=True)]) - postdate = post.find("span", {"class":"time"})['title'] - postslist.append({'postid':postid,\ - 'torrentid':torrentid,\ - 'comment':comment,\ - 'postdate':postdate,\ - 'pages':pages}) - - - return postslist - - - def whatForm(self, dom, action): - """ - Parse a what.cd edit page and returns a dict with all form inputs/textareas names and values - # Parameters: - * dom str: the edit page dom. - + action str: the action value from the requested form - """ - inputs = {} - - form = dom.find('input', {'name':'action', 'value':action}).parent - elements = form.fetch(('input', 'textarea')) - #get all form elements except for submit input - for element in elements[0:3]: - name = element.get('name', None) - if element.name == 'textarea': - inputs[name] = element.string - else: - inputs[name] = element.get('value', None) - return inputs - - - -if __name__ == "__main__": - print "Module to manage what.cd as a web service"