From c95bb9d29c439da1ce27f2f2ed4997f418c74589 Mon Sep 17 00:00:00 2001
From: piejanssens <pieterjanssens@me.com>
Date: Wed, 6 Aug 2014 19:59:59 +0200
Subject: [PATCH] Check for album's with artist in title

Fixes #1788 and #1666
---
 headphones/searcher.py | 51 +++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/headphones/searcher.py b/headphones/searcher.py
index 13f571ac..ef889d5c 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -29,7 +29,7 @@ import string
 import shutil
 import requests
 import subprocess
-
+import unicodedata
 
 import headphones
 from headphones.common import USER_AGENT
@@ -147,6 +147,11 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
     if data and bestqual:
         send_to_downloader(data, bestqual, album)
 
+def removeDisallowedFilenameChars(filename):
+    validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits)
+    cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').lower()
+    return ''.join(c for c in cleanedFilename if c in validFilenameChars)
+
 def more_filtering(results, album, albumlength, new):
 
     low_size_limit = None
@@ -174,33 +179,39 @@ def more_filtering(results, album, albumlength, new):
                 if headphones.PREFERRED_BITRATE_ALLOW_LOSSLESS:
                     allow_lossless = True
 
-    if low_size_limit or high_size_limit or new:
+    newlist = []
 
-        newlist = []
+    for result in results:
 
-        for result in results:
+        normalizedAlbumArtist = removeDisallowedFilenameChars(album['ArtistName'])
+        normalizedAlbumTitle = removeDisallowedFilenameChars(album['AlbumTitle'])
+        normalizedResultTitle = removeDisallowedFilenameChars(result[0]);
+        artistTitleCount = normalizedResultTitle.count(normalizedAlbumArtist)
 
-            if low_size_limit and (int(result[1]) < low_size_limit):
-                logger.info("%s from %s is too small for this album - not considering it. (Size: %s, Minsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(low_size_limit))
+        if normalizedAlbumArtist in normalizedAlbumTitle and artistTitleCount < 2:
+            continue
+
+        if low_size_limit and (int(result[1]) < low_size_limit):
+            logger.info("%s from %s is too small for this album - not considering it. (Size: %s, Minsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(low_size_limit))
+            continue
+
+        if high_size_limit and (int(result[1]) > high_size_limit):
+            logger.info("%s from %s is too large for this album - not considering it. (Size: %s, Maxsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(high_size_limit))
+
+            # Keep lossless results if there are no good lossy matches
+            if not (allow_lossless and 'flac' in result[0].lower()):
                 continue
 
-            if high_size_limit and (int(result[1]) > high_size_limit):
-                logger.info("%s from %s is too large for this album - not considering it. (Size: %s, Maxsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(high_size_limit))
+        if new:
+            alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [result[2]])
 
-                # Keep lossless results if there are no good lossy matches
-                if not (allow_lossless and 'flac' in result[0].lower()):
-                    continue
+            if len(alreadydownloaded):
+                logger.info('%s has already been downloaded from %s. Skipping.' % (result[0], result[3]))
+                continue
 
-            if new:
-                alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [result[2]])
+        newlist.append(result)
 
-                if len(alreadydownloaded):
-                    logger.info('%s has already been downloaded from %s. Skipping.' % (result[0], result[3]))
-                    continue
-
-            newlist.append(result)
-
-        results = newlist
+    results = newlist
 
     return results