Bandcamp support (#3252)

* Add configuration options for bandcamp

* Add bandcamp config options

* Add bandcamp config options

* Initial crude bandcamp search and download support

* Add bandcamp search support

* Better utf-8 handling and tagging of the downloads

* Post-process bandcamp dodwnload directory

* Tweak the order of downloads (prevent querying bandcamp too often)

* Show [bandcamp] link in the history page

* pep8

* Use more sane loglevels

* Oops.

* Patch regexp to support new bandcamp page structure

* Make sure the file-key exists, is not None and contains data
This commit is contained in:
Menno Blom
2023-11-26 10:43:13 +01:00
committed by GitHub
parent 4aaeaa704f
commit 73ca787cf1
7 changed files with 221 additions and 9 deletions

View File

@@ -310,6 +310,16 @@
<input type="text" name="usenet_retention" value="${config['usenet_retention']}" size="5">
</div>
</fieldset>
<fieldset title="Method for downloading Bandcamp.com files.">
<legend>Bandcamp</legend>
<div class="row">
<label title="Path to folder where Headphones can store raw downloads from Bandcamp.com.">
Bandcamp Directory
</label>
<input type="text" name="bandcamp_dir" value="${config['bandcamp_dir']}" size="50">
<small>Full path where raw MP3s will be stored, e.g. /Users/name/Downloads/bandcamp</small>
</div>
</fieldset>
</td>
<td>
<fieldset title="Method for downloading torrent files.">
@@ -579,6 +589,15 @@
</div>
</div>
</fieldset>
<fieldset>
<legend>Other</legend>
<fieldset>
<div class="row checkbox left">
<input id="use_bandcamp" type="checkbox" class="bigcheck" name="use_bandcamp" value="1" ${config['use_bandcamp']} /><label for="use_bandcamp"><span class="option">Bandcamp</span></label>
</div>
</fieldset>
</fieldset>
</td>
<td>
<fieldset>

View File

@@ -56,6 +56,8 @@
fileid = 'torrent'
if item['URL'].find('codeshy') != -1:
fileid = 'nzb'
if item['URL'].find('bandcamp') != -1:
fileid = 'bandcamp'
folder = 'Folder: ' + item['FolderName']

161
headphones/bandcamp.py Normal file
View File

@@ -0,0 +1,161 @@
# This file is part of Headphones.
#
# Headphones is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Headphones is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Headphones. If not, see <http://www.gnu.org/licenses/>
import headphones
import json
import os
import re
from headphones import logger, helpers, metadata, request
from headphones.common import USER_AGENT
from beets.mediafile import MediaFile, UnreadableFileError
from bs4 import BeautifulSoup
def search(album, albumlength=None, page=1, resultlist=None):
dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ',
'"': '', ',': '', '*': '', '.': '', ':': ''}
if resultlist is None:
resultlist = []
cleanalbum = helpers.latinToAscii(
helpers.replace_all(album['AlbumTitle'], dic)
).strip()
cleanartist = helpers.latinToAscii(
helpers.replace_all(album['ArtistName'], dic)
).strip()
headers = {'User-Agent': USER_AGENT}
params = {
"page": page,
"q": cleanalbum,
}
logger.info("Looking up https://bandcamp.com/search with {}".format(
params))
content = request.request_content(
url='https://bandcamp.com/search',
params=params,
headers=headers
).decode('utf8')
soup = BeautifulSoup(content, "html5lib")
for item in soup.find_all("li", class_="searchresult"):
type = item.find('div', class_='itemtype').text.strip().lower()
if type == "album":
data = parse_album(item)
cleanartist_found = helpers.latinToAscii(data['artist'])
cleanalbum_found = helpers.latinToAscii(data['album'])
logger.debug(u"{} - {}".format(data['album'], cleanalbum_found))
logger.debug("Comparing {} to {}".format(
cleanalbum, cleanalbum_found))
if (cleanartist.lower() == cleanartist_found.lower() and
cleanalbum.lower() == cleanalbum_found.lower()):
resultlist.append((
data['title'], data['size'], data['url'],
'bandcamp', 'bandcamp', True))
else:
continue
if(soup.find('a', class_='next')):
page += 1
logger.debug("Calling next page ({})".format(page))
search(album, albumlength=albumlength,
page=page, resultlist=resultlist)
return resultlist
def download(album, bestqual):
html = request.request_content(url=bestqual[2]).decode('utf-8')
trackinfo = []
try:
trackinfo = json.loads(
re.search(r"trackinfo&quot;:(\[.*?\]),", html)
.group(1)
.replace('&quot;', '"'))
except ValueError as e:
logger.warn("Couldn't load json: {}".format(e))
directory = os.path.join(
headphones.CONFIG.BANDCAMP_DIR,
u'{} - {}'.format(
album['ArtistName'].replace('/', '_'),
album['AlbumTitle'].replace('/', '_')))
directory = helpers.latinToAscii(directory)
if not os.path.exists(directory):
try:
os.makedirs(directory)
except Exception as e:
logger.warn("Could not create directory ({})".format(e))
index = 1
for track in trackinfo:
filename = helpers.replace_illegal_chars(
u'{:02d} - {}.mp3'.format(index, track['title']))
fullname = os.path.join(directory.encode('utf-8'),
filename.encode('utf-8'))
logger.debug("Downloading to {}".format(fullname))
if 'file' in track and track['file'] != None and 'mp3-128' in track['file']:
content = request.request_content(track['file']['mp3-128'])
open(fullname, 'wb').write(content)
try:
f = MediaFile(fullname)
date, year = metadata._date_year(album)
f.update({
'artist': album['ArtistName'].encode('utf-8'),
'album': album['AlbumTitle'].encode('utf-8'),
'title': track['title'].encode('utf-8'),
'track': track['track_num'],
'tracktotal': len(trackinfo),
'year': year,
})
f.save()
except UnreadableFileError as ex:
logger.warn("MediaFile couldn't parse: %s (%s)",
fullname,
str(ex))
index += 1
return directory
def parse_album(item):
album = item.find('div', class_='heading').text.strip()
artist = item.find('div', class_='subhead').text.strip().replace("by ", "")
released = item.find('div', class_='released').text.strip().replace(
"released ", "")
year = re.search(r"(\d{4})", released).group(1)
url = item.find('div', class_='heading').find('a')['href'].split("?")[0]
length = item.find('div', class_='length').text.strip()
tracks, minutes = length.split(",")
tracks = tracks.replace(" tracks", "").replace(" track", "").strip()
minutes = minutes.replace(" minutes", "").strip()
# bandcamp offers mp3 128b with should be 960KB/minute
size = int(minutes) * 983040
data = {"title": u'{} - {} [{}]'.format(artist, album, year),
"artist": artist, "album": album,
"url": url, "size": size}
return data

View File

@@ -317,7 +317,9 @@ _CONFIG_DEFINITIONS = {
'XBMC_PASSWORD': (str, 'XBMC', ''),
'XBMC_UPDATE': (int, 'XBMC', 0),
'XBMC_USERNAME': (str, 'XBMC', ''),
'XLDPROFILE': (str, 'General', '')
'XLDPROFILE': (str, 'General', ''),
'BANDCAMP': (int, 'General', 1),
'BANDCAMP_DIR': (path, 'General', '')
}

View File

@@ -48,6 +48,8 @@ def checkFolder():
single = False
if album['Kind'] == 'nzb':
download_dir = headphones.CONFIG.DOWNLOAD_DIR
elif album['Kind'] == 'bandcamp':
download_dir = headphones.CONFIG.BANDCAMP_DIR
else:
if headphones.CONFIG.DELUGE_DONE_DIRECTORY and headphones.CONFIG.TORRENT_DOWNLOADER == 3:
download_dir = headphones.CONFIG.DELUGE_DONE_DIRECTORY
@@ -1171,7 +1173,11 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig
if headphones.CONFIG.DOWNLOAD_DIR and not dir:
download_dirs.append(headphones.CONFIG.DOWNLOAD_DIR)
if headphones.CONFIG.DOWNLOAD_TORRENT_DIR and not dir:
download_dirs.append(headphones.CONFIG.DOWNLOAD_TORRENT_DIR)
download_dirs.append(
headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace'))
if headphones.CONFIG.BANDCAMP and not dir:
download_dirs.append(
headphones.CONFIG.BANDCAMP_DIR.encode(headphones.SYS_ENCODING, 'replace'))
# If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice.
download_dirs = list(set(download_dirs))

View File

@@ -39,8 +39,8 @@ import headphones
from headphones.common import USER_AGENT
from headphones.types import Result
from headphones import logger, db, helpers, classes, sab, nzbget, request
from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent
from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent, bandcamp
from bencode import bencode, bdecode
# Magnet to torrent services, for Black hole. Stolen from CouchPotato.
TORRENT_TO_MAGNET_SERVICES = [
@@ -284,25 +284,29 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
[album['AlbumID']])[0][0]
if headphones.CONFIG.PREFER_TORRENTS == 0 and not choose_specific_download:
if NZB_PROVIDERS and NZB_DOWNLOADERS:
results = searchNZB(album, new, losslessOnly, albumlength)
if not results and TORRENT_PROVIDERS:
results = searchTorrent(album, new, losslessOnly, albumlength)
elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download:
if not results and headphones.CONFIG.BANDCAMP:
results = searchBandcamp(album, new, albumlength)
elif headphones.CONFIG.PREFER_TORRENTS == 1 and not choose_specific_download:
if TORRENT_PROVIDERS:
results = searchTorrent(album, new, losslessOnly, albumlength)
if not results and NZB_PROVIDERS and NZB_DOWNLOADERS:
results = searchNZB(album, new, losslessOnly, albumlength)
if not results and headphones.CONFIG.BANDCAMP:
results = searchBandcamp(album, new, albumlength)
else:
nzb_results = None
torrent_results = None
bandcamp_results = None
if NZB_PROVIDERS and NZB_DOWNLOADERS:
nzb_results = searchNZB(album, new, losslessOnly, albumlength, choose_specific_download)
@@ -311,13 +315,16 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
torrent_results = searchTorrent(album, new, losslessOnly, albumlength,
choose_specific_download)
if headphones.CONFIG.BANDCAMP:
bandcamp_results = searchBandcamp(album, new, albumlength)
if not nzb_results:
nzb_results = []
if not torrent_results:
torrent_results = []
results = nzb_results + torrent_results
results = nzb_results + torrent_results + bandcamp_results
if choose_specific_download:
return results
@@ -502,6 +509,10 @@ def get_year_from_release_date(release_date):
return year
def searchBandcamp(album, new=False, albumlength=None):
return bandcamp.search(album)
def searchNZB(album, new=False, losslessOnly=False, albumlength=None,
choose_specific_download=False):
reldate = album['ReleaseDate']
@@ -839,6 +850,11 @@ def send_to_downloader(data, result, album):
except Exception as e:
logger.error('Couldn\'t write NZB file: %s', e)
return
elif kind == 'bandcamp':
folder_name = bandcamp.download(album, bestqual)
logger.info("Setting folder_name to: {}".format(folder_name))
else:
folder_name = '%s - %s [%s]' % (
unidecode(album['ArtistName']).replace('/', '_'),
@@ -1906,6 +1922,10 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None,
def preprocess(resultlist):
for result in resultlist:
if result[4] == 'bandcamp':
return True, result
if result[4] == 'torrent':
if result.provider in ["The Pirate Bay", "Old Pirate Bay"]:
headers = {

View File

@@ -1413,7 +1413,9 @@ class WebInterface(object):
"join_enabled": checked(headphones.CONFIG.JOIN_ENABLED),
"join_onsnatch": checked(headphones.CONFIG.JOIN_ONSNATCH),
"join_apikey": headphones.CONFIG.JOIN_APIKEY,
"join_deviceid": headphones.CONFIG.JOIN_DEVICEID
"join_deviceid": headphones.CONFIG.JOIN_DEVICEID,
"use_bandcamp": checked(headphones.CONFIG.BANDCAMP),
"bandcamp_dir": headphones.CONFIG.BANDCAMP_DIR
}
for k, v in config.items():
@@ -1482,7 +1484,7 @@ class WebInterface(object):
"songkick_enabled", "songkick_filter_enabled",
"mpc_enabled", "email_enabled", "email_ssl", "email_tls", "email_onsnatch",
"customauth", "idtag", "deluge_paused",
"join_enabled", "join_onsnatch"
"join_enabled", "join_onsnatch", "use_bandcamp"
]
for checked_config in checked_configs:
if checked_config not in kwargs: