From 9811df2779231cb527da5951c0a68ca31ebbc8ab Mon Sep 17 00:00:00 2001
From: rembo10 <rembo10@users.noreply.github.com>
Date: Thu, 18 Jan 2024 16:01:26 +0530
Subject: [PATCH] Slightly clean up searcher.py, fix regex warnings

---
 headphones/helpers.py      |  7 +++++
 headphones/helpers_test.py | 17 ++++++++++-
 headphones/searcher.py     | 61 ++++++++++++++++++++++++--------------
 3 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/headphones/helpers.py b/headphones/helpers.py
index c8f53c64..3a79d345 100644
--- a/headphones/helpers.py
+++ b/headphones/helpers.py
@@ -1050,3 +1050,10 @@ def have_pct_have_total(db_artist):
     have_pct = have_tracks / total_tracks if total_tracks else 0
     return (have_pct, total_tracks)
 
+
+def has_token(title, token):
+    return bool(
+            re.search(rf'(?:\W|^)+{token}(?:\W|$)+', 
+            title,
+            re.IGNORECASE | re.UNICODE)
+        )
diff --git a/headphones/helpers_test.py b/headphones/helpers_test.py
index 16753f91..09a1783c 100644
--- a/headphones/helpers_test.py
+++ b/headphones/helpers_test.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 from .unittestcompat import TestCase
-from headphones.helpers import clean_name, is_valid_date, age
+from headphones.helpers import clean_name, is_valid_date, age, has_token
 
 
 class HelpersTest(TestCase):
@@ -56,3 +56,18 @@ class HelpersTest(TestCase):
         ]
         for input, expected, desc in test_cases:
             self.assertEqual(is_valid_date(input), expected, desc)
+
+    def test_has_token(self):
+        """helpers: has_token()"""
+        self.assertEqual(
+            has_token("a cat ran", "cat"), 
+            True, 
+            "return True if token is in string"
+        )
+        self.assertEqual(
+            has_token("acatran", "cat"),
+            False,
+            "return False if token is part of another word"
+        )
+
+
diff --git a/headphones/searcher.py b/headphones/searcher.py
index 93ec607a..869c1084 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -37,9 +37,27 @@ from unidecode import unidecode
 
 import headphones
 from headphones.common import USER_AGENT
+from headphones.helpers import (
+    bytes_to_mb, 
+    has_token, 
+    piratesize, 
+    replace_all,
+    replace_illegal_chars, 
+    sab_replace_dots, 
+    sab_replace_spaces, 
+    sab_sanitize_foldername,
+    )
 from headphones.types import Result
-from headphones import logger, db, helpers, classes, sab, nzbget, request
-from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent, bandcamp
+from headphones import logger, db, classes, sab, nzbget, request
+from headphones import (
+    bandcamp,
+    deluge, 
+    notifiers, 
+    qbittorrent, 
+    rutracker, 
+    transmission, 
+    utorrent, 
+    )
 from bencode import bencode, bdecode
 
 # Magnet to torrent services, for Black hole. Stolen from CouchPotato.
@@ -137,7 +155,7 @@ def calculate_torrent_hash(link, data=None):
     """
 
     if link.startswith("magnet:"):
-        torrent_hash = re.findall("urn:btih:([\w]{32,40})", link)[0]
+        torrent_hash = re.findall(r"urn:btih:([\w]{32,40})", link)[0]
         if len(torrent_hash) == 32:
             torrent_hash = b16encode(b32decode(torrent_hash)).lower()
     elif data:
@@ -553,8 +571,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None,
             term = cleanartist + ' ' + cleanalbum
 
     # Replace bad characters in the term
-    term = re.sub('[\.\-\/]', ' ', term)
-    artistterm = re.sub('[\.\-\/]', ' ', cleanartist)
+    term = re.sub(r'[\.\-\/]', r' ', term)
+    artistterm = re.sub(r'[\.\-\/]', r' ', cleanartist)
 
     # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless
     if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
@@ -1172,7 +1190,7 @@ def send_to_downloader(data, result, album):
 
 
 def verifyresult(title, artistterm, term, lossless):
-    title = re.sub('[\.\-\/\_]', ' ', title)
+    title = re.sub(r'[\.\-\/\_]', r' ', title)
 
     # if artistterm != 'Various Artists':
     #
@@ -1235,23 +1253,23 @@ def verifyresult(title, artistterm, term, lossless):
                             title, each_word)
                 return False
 
-    tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE)
+    tokens = re.split(r'\W', term, re.IGNORECASE | re.UNICODE)
+
     for token in tokens:
 
         if not token:
             continue
         if token == 'Various' or token == 'Artists' or token == 'VA':
             continue
-        if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE):
+        if not has_token(title, token):
             cleantoken = ''.join(c for c in token if c not in string.punctuation)
-            if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title,
-                                 re.IGNORECASE | re.UNICODE):
+            if not has_token(title, cleantoken):
                 dic = {'!': 'i', '$': 's'}
                 dumbtoken = helpers.replace_all(token, dic)
-                if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title,
-                                     re.IGNORECASE | re.UNICODE):
-                    logger.info("Removed from results: %s (missing tokens: %s and %s)", title,
-                                token, cleantoken)
+                if not has_token(title, dumbtoken):
+                    logger.info(
+                        "Removed from results: %s (missing tokens: [%s, %s, %s])", 
+                        title, token, cleantoken, dumbtoken)
                     return False
 
     return True
@@ -1309,12 +1327,12 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None,
     else:
         usersearchterm = ''
 
-    semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist)
-    semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum)
+    semi_clean_artist_term = re.sub(r'[\.\-\/]', r' ', semi_cleanartist)
+    semi_clean_album_term = re.sub(r'[\.\-\/]', r' ', semi_cleanalbum)
     # Replace bad characters in the term
-    term = re.sub('[\.\-\/]', ' ', term)
-    artistterm = re.sub('[\.\-\/]', ' ', cleanartist)
-    albumterm = re.sub('[\.\-\/]', ' ', cleanalbum)
+    term = re.sub(r'[\.\-\/]', r' ', term)
+    artistterm = re.sub(r'[\.\-\/]', r' ', cleanartist)
+    albumterm = re.sub(r'[\.\-\/]', r' ', cleanalbum)
 
     # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless
     if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS:
@@ -1927,15 +1945,14 @@ def preprocess(resultlist):
         if result[4] == 'bandcamp':
             return True, result
 
-        if result[4] == 'torrent':
-
-        if result.provider in ["The Pirate Bay", "Old Pirate Bay"]:
+        if result[4] == 'torrent' and result.provider in ["The Pirate Bay", "Old Pirate Bay"]:
             headers = {
                 'User-Agent':
                     'Mozilla/5.0 (Windows NT 6.3; Win64; x64) \
                     AppleWebKit/537.36 (KHTML, like Gecko) \
                     Chrome/41.0.2243.2 Safari/537.36'
             }
+
         else:
             headers = {'User-Agent': USER_AGENT}