Bandcamp support (#3252)

* Add configuration options for bandcamp * Add bandcamp config options * Add bandcamp config options * Initial crude bandcamp search and download support * Add bandcamp search support * Better utf-8 handling and tagging of the downloads * Post-process bandcamp dodwnload directory * Tweak the order of downloads (prevent querying bandcamp too often) * Show [bandcamp] link in the history page * pep8 * Use more sane loglevels * Oops. * Patch regexp to support new bandcamp page structure * Make sure the file-key exists, is not None and contains data
2026-04-14 00:49:28 +01:00 · 2023-11-26 10:43:13 +01:00
parent 4aaeaa704f
commit 73ca787cf1
7 changed files with 221 additions and 9 deletions
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@@ -310,6 +310,16 @@
                                    <input type="text" name="usenet_retention" value="${config['usenet_retention']}" size="5">
                                </div>
                            </fieldset>
+                            <fieldset title="Method for downloading Bandcamp.com files.">
+                                <legend>Bandcamp</legend>
+                                <div class="row">
+                                    <label title="Path to folder where Headphones can store raw downloads from Bandcamp.com.">
+                                        Bandcamp Directory
+                                    </label>
+                                    <input type="text" name="bandcamp_dir" value="${config['bandcamp_dir']}" size="50">
+                                    <small>Full path where raw MP3s will be stored, e.g. /Users/name/Downloads/bandcamp</small>
+                                </div>
+                            </fieldset>
                        </td>
                        <td>
                            <fieldset title="Method for downloading torrent files.">
@@ -579,6 +589,15 @@
                                    </div>
                                </div>
                            </fieldset>
+
+                            <fieldset>
+                                <legend>Other</legend>
+                                <fieldset>
+                                    <div class="row checkbox left">
+                                        <input id="use_bandcamp" type="checkbox" class="bigcheck" name="use_bandcamp" value="1" ${config['use_bandcamp']} /><label for="use_bandcamp"><span class="option">Bandcamp</span></label>
+                                    </div>
+                                </fieldset>
+                            </fieldset>
                        </td>
                        <td>
                            <fieldset>
--- a/data/interfaces/default/history.html
+++ b/data/interfaces/default/history.html
@@ -56,6 +56,8 @@
                fileid = 'torrent'
            if item['URL'].find('codeshy') != -1:
                fileid = 'nzb'
+            if item['URL'].find('bandcamp') != -1:
+                fileid = 'bandcamp'

            folder = 'Folder: ' + item['FolderName']

--- a/headphones/bandcamp.py
+++ b/headphones/bandcamp.py
@@ -0,0 +1,161 @@
+#  This file is part of Headphones.
+#
+#  Headphones is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  Headphones is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with Headphones.  If not, see <http://www.gnu.org/licenses/>
+
+import headphones
+import json
+import os
+import re
+
+from headphones import logger, helpers, metadata, request
+from headphones.common import USER_AGENT
+
+from beets.mediafile import MediaFile, UnreadableFileError
+from bs4 import BeautifulSoup
+
+
+def search(album, albumlength=None, page=1, resultlist=None):
+    dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ',
+           '"': '', ',': '', '*': '', '.': '', ':': ''}
+    if resultlist is None:
+        resultlist = []
+
+    cleanalbum = helpers.latinToAscii(
+        helpers.replace_all(album['AlbumTitle'], dic)
+        ).strip()
+    cleanartist = helpers.latinToAscii(
+        helpers.replace_all(album['ArtistName'], dic)
+        ).strip()
+
+    headers = {'User-Agent': USER_AGENT}
+    params = {
+        "page": page,
+        "q": cleanalbum,
+    }
+    logger.info("Looking up https://bandcamp.com/search with {}".format(
+        params))
+    content = request.request_content(
+        url='https://bandcamp.com/search',
+        params=params,
+        headers=headers
+        ).decode('utf8')
+    soup = BeautifulSoup(content, "html5lib")
+
+    for item in soup.find_all("li", class_="searchresult"):
+        type = item.find('div', class_='itemtype').text.strip().lower()
+        if type == "album":
+            data = parse_album(item)
+
+            cleanartist_found = helpers.latinToAscii(data['artist'])
+            cleanalbum_found = helpers.latinToAscii(data['album'])
+
+            logger.debug(u"{} - {}".format(data['album'], cleanalbum_found))
+
+            logger.debug("Comparing {} to {}".format(
+                cleanalbum, cleanalbum_found))
+            if (cleanartist.lower() == cleanartist_found.lower() and
+                    cleanalbum.lower() == cleanalbum_found.lower()):
+                resultlist.append((
+                    data['title'], data['size'], data['url'],
+                    'bandcamp', 'bandcamp', True))
+        else:
+            continue
+
+    if(soup.find('a', class_='next')):
+        page += 1
+        logger.debug("Calling next page ({})".format(page))
+        search(album, albumlength=albumlength,
+               page=page, resultlist=resultlist)
+
+    return resultlist
+
+
+def download(album, bestqual):
+    html = request.request_content(url=bestqual[2]).decode('utf-8')
+    trackinfo = []
+    try:
+        trackinfo = json.loads(
+            re.search(r"trackinfo&quot;:(\[.*?\]),", html)
+            .group(1)
+            .replace('&quot;', '"'))
+    except ValueError as e:
+        logger.warn("Couldn't load json: {}".format(e))
+
+    directory = os.path.join(
+        headphones.CONFIG.BANDCAMP_DIR,
+        u'{} - {}'.format(
+            album['ArtistName'].replace('/', '_'),
+            album['AlbumTitle'].replace('/', '_')))
+    directory = helpers.latinToAscii(directory)
+
+    if not os.path.exists(directory):
+        try:
+            os.makedirs(directory)
+        except Exception as e:
+            logger.warn("Could not create directory ({})".format(e))
+
+    index = 1
+    for track in trackinfo:
+        filename = helpers.replace_illegal_chars(
+                    u'{:02d} - {}.mp3'.format(index, track['title']))
+        fullname = os.path.join(directory.encode('utf-8'),
+                                filename.encode('utf-8'))
+        logger.debug("Downloading to {}".format(fullname))
+
+        if 'file' in track and track['file'] != None and 'mp3-128' in track['file']:
+            content = request.request_content(track['file']['mp3-128'])
+            open(fullname, 'wb').write(content)
+            try:
+                f = MediaFile(fullname)
+                date, year = metadata._date_year(album)
+                f.update({
+                    'artist': album['ArtistName'].encode('utf-8'),
+                    'album': album['AlbumTitle'].encode('utf-8'),
+                    'title': track['title'].encode('utf-8'),
+                    'track': track['track_num'],
+                    'tracktotal': len(trackinfo),
+                    'year': year,
+                })
+                f.save()
+            except UnreadableFileError as ex:
+                logger.warn("MediaFile couldn't parse: %s (%s)",
+                            fullname,
+                            str(ex))
+
+        index += 1
+
+    return directory
+
+
+def parse_album(item):
+    album = item.find('div', class_='heading').text.strip()
+    artist = item.find('div', class_='subhead').text.strip().replace("by ", "")
+    released = item.find('div', class_='released').text.strip().replace(
+        "released ", "")
+    year = re.search(r"(\d{4})", released).group(1)
+
+    url = item.find('div', class_='heading').find('a')['href'].split("?")[0]
+
+    length = item.find('div', class_='length').text.strip()
+    tracks, minutes = length.split(",")
+    tracks = tracks.replace(" tracks", "").replace(" track", "").strip()
+    minutes = minutes.replace(" minutes", "").strip()
+    # bandcamp offers mp3 128b with should be 960KB/minute
+    size = int(minutes) * 983040
+
+    data = {"title": u'{} - {} [{}]'.format(artist, album, year),
+            "artist": artist, "album": album,
+            "url": url, "size": size}
+
+    return data
--- a/headphones/config.py
+++ b/headphones/config.py
@@ -317,7 +317,9 @@ _CONFIG_DEFINITIONS = {
    'XBMC_PASSWORD': (str, 'XBMC', ''),
    'XBMC_UPDATE': (int, 'XBMC', 0),
    'XBMC_USERNAME': (str, 'XBMC', ''),
-    'XLDPROFILE': (str, 'General', '')
+    'XLDPROFILE': (str, 'General', ''),
+    'BANDCAMP': (int, 'General', 1),
+    'BANDCAMP_DIR': (path, 'General', '')
 }


--- a/headphones/postprocessor.py
+++ b/headphones/postprocessor.py
@@ -48,6 +48,8 @@ def checkFolder():
                single = False
                if album['Kind'] == 'nzb':
                    download_dir = headphones.CONFIG.DOWNLOAD_DIR
+                elif album['Kind'] == 'bandcamp':
+                    download_dir = headphones.CONFIG.BANDCAMP_DIR
                else:
                    if headphones.CONFIG.DELUGE_DONE_DIRECTORY and headphones.CONFIG.TORRENT_DOWNLOADER == 3:
                        download_dir = headphones.CONFIG.DELUGE_DONE_DIRECTORY
@@ -1171,7 +1173,11 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig
        if headphones.CONFIG.DOWNLOAD_DIR and not dir:
            download_dirs.append(headphones.CONFIG.DOWNLOAD_DIR)
        if headphones.CONFIG.DOWNLOAD_TORRENT_DIR and not dir:
-            download_dirs.append(headphones.CONFIG.DOWNLOAD_TORRENT_DIR)
+            download_dirs.append(
+                headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace'))
+        if headphones.CONFIG.BANDCAMP and not dir:
+            download_dirs.append(
+                headphones.CONFIG.BANDCAMP_DIR.encode(headphones.SYS_ENCODING, 'replace'))

        # If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice.
        download_dirs = list(set(download_dirs))
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -39,8 +39,8 @@ import headphones
 from headphones.common import USER_AGENT
 from headphones.types import Result
 from headphones import logger, db, helpers, classes, sab, nzbget, request
-from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent
-
+from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent, bandcamp
+from bencode import bencode, bdecode

 # Magnet to torrent services, for Black hole. Stolen from CouchPotato.
 TORRENT_TO_MAGNET_SERVICES = [
@@ -284,25 +284,29 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
                              [album['AlbumID']])[0][0]

    if headphones.CONFIG.PREFER_TORRENTS == 0 and not choose_specific_download:
-
        if NZB_PROVIDERS and NZB_DOWNLOADERS:
            results = searchNZB(album, new, losslessOnly, albumlength)

        if not results and TORRENT_PROVIDERS:
            results = searchTorrent(album, new, losslessOnly, albumlength)

-    elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download:
+        if not results and headphones.CONFIG.BANDCAMP:
+            results = searchBandcamp(album, new, albumlength)

+    elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download:
        if TORRENT_PROVIDERS:
            results = searchTorrent(album, new, losslessOnly, albumlength)

        if not results and NZB_PROVIDERS and NZB_DOWNLOADERS:
            results = searchNZB(album, new, losslessOnly, albumlength)

+        if not results and headphones.CONFIG.BANDCAMP:
+            results = searchBandcamp(album, new, albumlength)
    else:

        nzb_results = None
        torrent_results = None
+        bandcamp_results = None

        if NZB_PROVIDERS and NZB_DOWNLOADERS:
            nzb_results = searchNZB(album, new, losslessOnly, albumlength, choose_specific_download)
@@ -311,13 +315,16 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
            torrent_results = searchTorrent(album, new, losslessOnly, albumlength,
                                            choose_specific_download)

+        if headphones.CONFIG.BANDCAMP:
+            bandcamp_results = searchBandcamp(album, new, albumlength)
+
        if not nzb_results:
            nzb_results = []

        if not torrent_results:
            torrent_results = []

-        results = nzb_results + torrent_results
+        results = nzb_results + torrent_results + bandcamp_results

    if choose_specific_download:
        return results
@@ -502,6 +509,10 @@ def get_year_from_release_date(release_date):
    return year


+def searchBandcamp(album, new=False, albumlength=None):
+    return bandcamp.search(album)
+
+
 def searchNZB(album, new=False, losslessOnly=False, albumlength=None,
              choose_specific_download=False):
    reldate = album['ReleaseDate']
@@ -839,6 +850,11 @@ def send_to_downloader(data, result, album):
            except Exception as e:
                logger.error('Couldn\'t write NZB file: %s', e)
                return
+
+    elif kind == 'bandcamp':
+        folder_name = bandcamp.download(album, bestqual)
+        logger.info("Setting folder_name to: {}".format(folder_name))
+
    else:
        folder_name = '%s - %s [%s]' % (
            unidecode(album['ArtistName']).replace('/', '_'),
@@ -1906,6 +1922,10 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None,

 def preprocess(resultlist):
    for result in resultlist:
+        if result[4] == 'bandcamp':
+            return True, result
+
+        if result[4] == 'torrent':

        if result.provider in ["The Pirate Bay", "Old Pirate Bay"]:
            headers = {
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -1413,7 +1413,9 @@ class WebInterface(object):
            "join_enabled": checked(headphones.CONFIG.JOIN_ENABLED),
            "join_onsnatch": checked(headphones.CONFIG.JOIN_ONSNATCH),
            "join_apikey": headphones.CONFIG.JOIN_APIKEY,
-            "join_deviceid": headphones.CONFIG.JOIN_DEVICEID
+            "join_deviceid": headphones.CONFIG.JOIN_DEVICEID,
+            "use_bandcamp": checked(headphones.CONFIG.BANDCAMP),
+            "bandcamp_dir": headphones.CONFIG.BANDCAMP_DIR
        }

        for k, v in config.items():
@@ -1482,7 +1484,7 @@ class WebInterface(object):
            "songkick_enabled", "songkick_filter_enabled",
            "mpc_enabled", "email_enabled", "email_ssl", "email_tls", "email_onsnatch",
            "customauth", "idtag", "deluge_paused",
-            "join_enabled", "join_onsnatch"
+            "join_enabled", "join_onsnatch", "use_bandcamp"
        ]
        for checked_config in checked_configs:
            if checked_config not in kwargs: