Update headphones/searcher.py

2026-07-20 16:03:59 +01:00 · 2011-11-04 17:46:50 +13:00
parent 7b01d5e747
commit 22db06ed1e
1 changed files with 360 additions and 1 deletions
@@ -49,6 +49,26 @@ def getNewzbinURL(url):
    f.close()

    return data
+    
+def url_fix(s, charset='utf-8'):
+    if isinstance(s, unicode):
+        s = s.encode(charset, 'ignore')
+    scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
+    path = urllib.quote(path, '/%')
+    qs = urllib.quote_plus(qs, ':&=')
+    return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))    
+    
+    
+def searchforalbum(albumid=None, new=False):
+    foundNZB = "none"
+    if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
+        foundNZB = searchNZB(albumid, new)
+
+    if foundNZB == "none":
+        searchTorrent(albumid, new)
+        
+        
+        

 def searchNZB(albumid=None, new=False):

@@ -513,4 +533,343 @@ def preprocess(resultlist):
            return nzb, result
        else:
            logger.error("Couldn't retrieve the best nzb. Skipping.")
-    return (False, False)
+    return (False, False)
+
+
+
+def searchTorrent(albumid=None, new=False):
+
+    myDB = db.DBConnection()
+    
+    if albumid:
+        results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" AND AlbumID=?', [albumid])
+    else:
+        results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted"')
+        new = True
+        
+    for albums in results:
+        
+        albumid = albums[2]
+        reldate = albums[3]
+        
+        try:
+            year = reldate[:4]
+        except TypeError:
+            year = ''
+        
+        dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''}
+
+        cleanalbum = helpers.latinToAscii(helpers.replace_all(albums[1], dic))
+        cleanartist = helpers.latinToAscii(helpers.replace_all(albums[0], dic))
+
+        # FLAC usually doesn't have a year for some reason so I'll leave it out
+        # Various Artist albums might be listed as VA, so I'll leave that out too
+        # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums
+        if albums[0] in albums[1] or len(albums[0]) < 4 or len(albums[1]) < 4:
+            term = cleanartist + ' ' + cleanalbum + ' ' + year
+        elif albums[0] == 'Various Artists':
+        	term = cleanalbum + ' ' + year
+        else:
+        	term = cleanartist + ' ' + cleanalbum
+            
+        # Replace bad characters in the term and unicode it
+        term = re.sub('[\.\-\/]', ' ', term).encode('utf-8')
+        artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8')
+        
+        logger.info("Searching for %s since it was marked as wanted" % term)
+        
+        resultlist = []
+        minimumseeders = int(headphones.NUMBEROFSEEDERS) - 1
+
+        if headphones.KAT:
+            provider = "Kick Ass Torrent"
+            providerurl = url_fix("http://www.kat.ph/search/" + term)
+            if headphones.PREFERRED_QUALITY == 3:
+                categories = "7"        #music
+                format = "2"             #flac
+                maxsize = 10000000000
+            elif headphones.PREFERRED_QUALITY:
+                categories = "7"        #music
+                format = "10"            #mp3+flac
+                maxsize = 10000000000
+            else:
+                categories = "7"        #music
+                format = "8"            #mp3
+                maxsize = 300000000        
+
+            params = {   
+                        "categories[0]": "music",
+                        "field": "seeders",
+                        "sorder": "desc",
+                        "rss": "1"
+                      }
+            searchURL = providerurl + "/?%s" % urllib.urlencode(params)
+            
+            try:
+            	data = urllib2.urlopen(searchURL, timeout=20).read()
+            except urllib2.URLError, e:
+            	logger.warn('Error fetching data from %s: %s' % (provider, e))
+            	data = False
+            
+            if data:
+			
+				d = feedparser.parse(data)
+				if not len(d.entries):
+					logger.info(u"No results found from %s for %s" % (provider, term))
+					pass
+				
+				else:
+					for item in d.entries:
+						try:
+							rightformat = True
+							title = item.title
+							seeders = item.seeds
+							url = item.links[1]['url']
+							size = int(item.links[1]['length'])
+							try:
+								if format == "2":
+									torrent = urllib2.urlopen(url, timeout=30).read()
+									if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
+										rightformat = False
+							except Exception, e:
+								rightformat = False
+							if rightformat == True and size < maxsize and minimumseeders < int(seeders):
+								resultlist.append((title, size, url, provider))
+								logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+							else:
+								logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes)' % (title, size))    
+						
+						except Exception, e:
+							logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
+
+                   
+        if headphones.ISOHUNT:
+            provider = "ISOhunt"    
+            providerurl = url_fix("http://isohunt.com/js/rss/" + term)
+            if headphones.PREFERRED_QUALITY == 3:
+                categories = "7"        #music
+                format = "2"             #flac
+                maxsize = 10000000000
+            elif headphones.PREFERRED_QUALITY:
+                categories = "7"        #music
+                format = "10"            #mp3+flac
+                maxsize = 10000000000
+            else:
+                categories = "7"        #music
+                format = "8"            #mp3
+                maxsize = 300000000        
+
+            params = {   
+                        "iht": "2",
+                        "sort": "seeds"
+                      }
+            searchURL = providerurl + "?%s" % urllib.urlencode(params)
+            
+            try:
+            	data = urllib2.urlopen(searchURL, timeout=20).read()
+            except urllib2.URLError, e:
+            	logger.warn('Error fetching data from %s: %s' % (provider, e))
+            	data = False
+            
+            if data:
+			
+				d = feedparser.parse(data)
+				if not len(d.entries):
+					logger.info(u"No results found from %s for %s" % (provider, term))
+					pass
+				
+				else:
+					for item in d.entries:
+						try:
+							rightformat = True
+							title = re.sub(r"(?<=  \[)(.+)(?=\])","",item.title)
+							title = title.replace("[]","")
+							sxstart = item.description.find("Seeds: ") + 7
+							seeds = ""
+							while item.description[sxstart:sxstart + 1] != " ":
+								seeds = seeds + item.description[sxstart:sxstart + 1]
+								sxstart = sxstart + 1
+							url = item.links[1]['url']
+							size = int(item.links[1]['length'])
+							try:
+								if format == "2":
+									torrent = urllib2.urlopen(url, timeout=30).read()
+									if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
+										rightformat = False
+							except Exception, e:
+								rightformat = False
+							if rightformat == True and size < maxsize and minimumseeders < seeders:
+								resultlist.append((title, size, url, provider))
+								logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+							else:
+								logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes)' % (title, size))    
+						
+						except Exception, e:
+							logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
+
+        if headphones.MININOVA:
+            provider = "Mininova"    
+            providerurl = url_fix("http://www.mininova.org/rss/" + term + "/5")
+            if headphones.PREFERRED_QUALITY == 3:
+                categories = "7"        #music
+                format = "2"             #flac
+                maxsize = 10000000000
+            elif headphones.PREFERRED_QUALITY:
+                categories = "7"        #music
+                format = "10"            #mp3+flac
+                maxsize = 10000000000
+            else:
+                categories = "7"        #music
+                format = "8"            #mp3
+                maxsize = 300000000        
+
+            searchURL = providerurl     
+       
+            try:
+            	data = urllib2.urlopen(searchURL, timeout=20).read()
+            except urllib2.URLError, e:
+            	logger.warn('Error fetching data from %s: %s' % (provider, e))
+            	data = False
+            
+            if data:
+			
+				d = feedparser.parse(data)
+				if not len(d.entries):
+					logger.info(u"No results found from %s for %s" % (provider, term))
+					pass
+				
+				else:
+					for item in d.entries:
+						try:
+							rightformat = True
+							title = item.title
+							sxstart = item.description.find("Ratio: ") + 7
+							seeds = ""
+							while item.description[sxstart:sxstart + 1] != " ":
+								seeds = seeds + item.description[sxstart:sxstart + 1]
+								sxstart = sxstart + 1
+							url = item.links[1]['url']
+							size = int(item.links[1]['length'])
+							try:
+								if format == "2":
+									torrent = urllib2.urlopen(url, timeout=30).read()
+									if int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1:
+										rightformat = False
+							except Exception, e:
+								rightformat = False
+							if rightformat == True and size < maxsize and minimumseeders < seeders:
+								resultlist.append((title, size, url, provider))
+								logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+							else:
+								logger.info('%s is larger than the maxsize, the wrong format or has to little seeders for this category, skipping. (Size: %i bytes)' % (title, size))    
+						
+						except Exception, e:
+							logger.error(u"An unknown error occured trying to parse the feed: %s" % e)
+
+
+
+        #attempt to verify that this isn't a substring result
+        #when looking for "Foo - Foo" we don't want "Foobar"
+        #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist
+        if len(resultlist):
+            resultlist[:] = [result for result in resultlist if verifyresult(result[0], artistterm, term)]
+        
+        if len(resultlist):    
+                       
+            if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
+
+                logger.debug('Target bitrate: %s kbps' % headphones.PREFERRED_BITRATE)
+
+                tracks = myDB.select('SELECT TrackDuration from tracks WHERE AlbumID=?', [albumid])
+
+                try:
+                    albumlength = sum([pair[0] for pair in tracks])
+
+                    targetsize = albumlength/1000 * int(headphones.PREFERRED_BITRATE) * 128
+                    logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize))
+    
+                    newlist = []
+
+                    for result in resultlist:
+                        delta = abs(targetsize - result[1])
+                        newlist.append((result[0], result[1], result[2], delta))
+        
+                    torrentlist = sorted(newlist, key=lambda title: title[3])
+                
+                except Exception, e:
+                    
+                    logger.debug('Error: %s' % str(e))
+                    logger.info('No track information for %s - %s. Defaulting to highest quality' % (albums[0], albums[1]))
+                    
+                    torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
+            
+            else:
+            
+                torrentlist = sorted(resultlist, key=lambda title: title[1], reverse=True)
+            
+            
+            if new:
+    
+				while True:
+                	
+					if len(torrentlist):
+                	
+						alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [torrentlist[0][2]])
+						
+						if len(alreadydownloaded):
+							logger.info('%s has already been downloaded. Skipping.' % torrentlist[0][0])
+							torrentlist.pop(0)
+						
+						else:
+							break
+					else:
+						logger.info('No more results found for %s' % term)
+						return
+
+            logger.info(u"Pre-processing result")
+            
+            (data, bestqual) = preprocesstorrent(torrentlist)
+            
+            if data and bestqual:
+            	logger.info(u'Found best result: <a href="%s">%s</a> - %s' % (bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])))
+                torrent_folder_name = '%s - %s [%s]' % (helpers.latinToAscii(albums[0]).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(albums[1]).encode('UTF-8').replace('/', '_'), year) 
+                if headphones.TORRENTBLACKHOLE_DIR == "sendtracker":
+
+                    torrent = classes.TorrentDataSearchResult()
+                    torrent.extraInfo.append(data)
+                    torrent.name = torrent_folder_name
+                    sab.sendTorrent(torrent)
+
+                elif headphones.TORRENTBLACKHOLE_DIR != "":
+                
+                    torrent_name = torrent_folder_name + '.torrent'
+                    download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
+                    try:
+                        f = open(download_path, 'w')
+                        f.write(data)
+                        f.close()
+                        logger.info('File saved to: %s' % torrent_name)
+                    except Exception, e:
+                        logger.error('Couldn\'t write Torrent file: %s' % e)
+                        break
+                        
+                myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]])
+                myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name])
+
+
+
+
+def preprocesstorrent(resultlist):
+    selresult = ""
+    for result in resultlist:
+        try:
+            if selresult == "":
+                selresult = result 
+                torrent = urllib2.urlopen(result[2], timeout=30).read()
+            elif int(selresult[1]) < int(result[1]):
+                selresult = result
+                torrent = urllib2.urlopen(result[2], timeout=30).read()
+        except ExpatError:
+            logger.error('Unable to torrent file. Skipping.')
+            continue
+    return torrent, selresult