mirror of
https://github.com/rembo10/headphones.git
synced 2026-04-18 19:09:28 +01:00
Merge remote-tracking branch 'andrzejc/clean-name' into develop
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# This file is part of Headphones.
|
||||
#
|
||||
# Headphones is free software: you can redistribute it and/or modify
|
||||
@@ -219,12 +220,104 @@ def replace_illegal_chars(string, type="file"):
|
||||
return string
|
||||
|
||||
|
||||
def cleanName(string):
|
||||
pass1 = latinToAscii(string).lower()
|
||||
out_string = re.sub('[\.\-\/\!\@\#\$\%\^\&\*\(\)\+\-\"\'\,\;\:\[\]\{\}\<\>\=\_]', '',
|
||||
pass1).encode('utf-8')
|
||||
_CN_RE1 = re.compile(ur'[^\w]+', re.UNICODE)
|
||||
_CN_RE2 = re.compile(ur'[\s_]+', re.UNICODE)
|
||||
|
||||
return out_string
|
||||
|
||||
_XLATE_GRAPHICAL_AND_DIACRITICAL = {
|
||||
# Translation table.
|
||||
# Covers the following letters, for which NFD fails because of lack of
|
||||
# combining character:
|
||||
# ©ª«®²³¹»¼½¾ÆÐØÞßæðøþĐđĦħıIJijĸĿŀŁłŒœŦŧDŽDždžLJLjljNJNjnjǤǥDZDzdzȤȥ. This
|
||||
# includes also some graphical symbols which can be easily replaced and
|
||||
# usually are written by people who don't have appropriate keyboard layout.
|
||||
u'©': '(C)', u'ª': 'a.', u'«': '<<', u'®': '(R)', u'²': '2', u'³': '3',
|
||||
u'¹': '1', u'»': '>>', u'¼': ' 1/4 ', u'½': ' 1/2 ', u'¾': ' 3/4 ',
|
||||
u'Æ': 'AE', u'Ð': 'D', u'Ø': 'O', u'Þ': 'Th', u'ß': 'ss', u'æ': 'ae',
|
||||
u'ð': 'd', u'ø': 'o', u'þ': 'th', u'Đ': 'D', u'đ': 'd', u'Ħ': 'H',
|
||||
u'ħ': 'h', u'ı': 'i', u'IJ': 'IJ', u'ij': 'ij', u'ĸ': 'q', u'Ŀ': 'L',
|
||||
u'ŀ': 'l', u'Ł': 'L', u'ł': 'l', u'Œ': 'OE', u'œ': 'oe', u'Ŧ': 'T',
|
||||
u'ŧ': 't', u'DŽ': 'DZ', u'Dž': 'Dz', u'LJ': 'LJ', u'Lj': 'Lj',
|
||||
u'lj': 'lj', u'NJ': 'NJ', u'Nj': 'Nj', u'nj': 'nj',
|
||||
u'Ǥ': 'G', u'ǥ': 'g', u'DZ': 'DZ', u'Dz': 'Dz', u'dz': 'dz',
|
||||
u'Ȥ': 'Z', u'ȥ': 'z', u'№': 'No.',
|
||||
u'º': 'o.', # normalize Nº abbrev (popular w/ classical music),
|
||||
# this is 'masculine ordering indicator', not degree
|
||||
}
|
||||
|
||||
_XLATE_SPECIAL = {
|
||||
# Translation table.
|
||||
# Cover additional special characters processing normalization.
|
||||
u"'": '', # replace apostrophe with nothing
|
||||
u'&': ' and ', # expand & to ' and '
|
||||
}
|
||||
|
||||
|
||||
def _translate(s, dictionary):
|
||||
# type: (basestring,Mapping[basestring,basestring])->basestring
|
||||
return ''.join(dictionary.get(x, x) for x in s)
|
||||
|
||||
|
||||
_COMBINING_RANGES = (
|
||||
(0x0300, 0x036f), # Combining Diacritical Marks
|
||||
(0x1ab0, 0x1aff), # Combining Diacritical Marks Extended
|
||||
(0x20d0, 0x20ff), # Combining Diacritical Marks for Symbols
|
||||
(0x1dc0, 0x1dff) # Combining Diacritical Marks Supplement
|
||||
)
|
||||
|
||||
|
||||
def _is_unicode_combining(u):
|
||||
# type: (unicode)->bool
|
||||
"""
|
||||
Check if input unicode is combining diacritical mark.
|
||||
"""
|
||||
i = ord(u)
|
||||
for r in _COMBINING_RANGES:
|
||||
if r[0] <= i <= r[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _transliterate(u, xlate):
|
||||
# type: (unicode)->unicode
|
||||
"""
|
||||
Perform transliteration using the specified dictionary
|
||||
"""
|
||||
u = unicodedata.normalize('NFD', u)
|
||||
u = u''.join([u'' if _is_unicode_combining(x) else x for x in u])
|
||||
u = _translate(u, xlate)
|
||||
# at this point output is either unicode, or plain ascii
|
||||
return unicode(u)
|
||||
|
||||
|
||||
def clean_name(s):
|
||||
# type: (basestring)->unicode
|
||||
"""Remove non-alphanumeric characters from the string, perform
|
||||
normalization and substitution of some special characters; coalesce spaces.
|
||||
:param s: string to clean up, possibly unicode one.
|
||||
:return: cleaned-up version of input string.
|
||||
"""
|
||||
if not isinstance(s, unicode):
|
||||
# ignore extended chars if someone was dumb enough to pass non-ascii
|
||||
# narrow string here, use only unicode for meaningful texts
|
||||
u = unicode(s, 'ascii', 'replace')
|
||||
else:
|
||||
u = s
|
||||
# 1. don't bother doing normalization NFKC, rather transliterate
|
||||
# using special translation table
|
||||
u = _transliterate(u, _XLATE_GRAPHICAL_AND_DIACRITICAL)
|
||||
# 2. normalize NFKC the result
|
||||
u = unicodedata.normalize('NFKC', u)
|
||||
# 3. translate spacials
|
||||
u = _translate(u, _XLATE_SPECIAL)
|
||||
# 4. replace any non-alphanumeric character sequences by spaces
|
||||
u = _CN_RE1.sub(u' ', u)
|
||||
# 5. coalesce interleaved space/underscore sequences
|
||||
u = _CN_RE2.sub(u' ', u)
|
||||
# 6. trim
|
||||
u = u.strip()
|
||||
# 7. lowercase
|
||||
return u
|
||||
|
||||
|
||||
def cleanTitle(title):
|
||||
|
||||
48
headphones/helpers_test.py
Normal file
48
headphones/helpers_test.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from unittestcompat import TestCase
|
||||
from headphones.helpers import clean_name
|
||||
|
||||
|
||||
class HelpersTest(TestCase):
|
||||
|
||||
def test_clean_name(self):
|
||||
"""helpers: check correctness of clean_name() function"""
|
||||
cases = {
|
||||
u' Weiße & rose ': 'Weisse and rose',
|
||||
u'Multiple / spaces': 'Multiple spaces',
|
||||
u'Kevin\'s m²': 'Kevins m2',
|
||||
u'Symphonęy Nº9': 'Symphoney No.9',
|
||||
u'ÆæßðÞIJij': u'AeaessdThIJıj',
|
||||
u'Obsessió (Cerebral Apoplexy remix)': 'obsessio cerebral '
|
||||
'apoplexy remix',
|
||||
u'Doktór Hałabała i siedmiu zbojów': 'doktor halabala i siedmiu '
|
||||
'zbojow',
|
||||
u'Arbetets Söner och Döttrar': 'arbetets soner och dottrar',
|
||||
u'Björk Guðmundsdóttir': 'bjork gudmundsdottir',
|
||||
u'L\'Arc~en~Ciel': 'larc en ciel',
|
||||
u'Orquesta de la Luz (オルケスタ・デ・ラ・ルス)':
|
||||
u'Orquesta de la Luz オルケスタ デ ラ ルス'
|
||||
|
||||
}
|
||||
for first, second in cases.iteritems():
|
||||
nf = clean_name(first).lower()
|
||||
ns = clean_name(second).lower()
|
||||
self.assertEqual(
|
||||
nf, ns, u"check cleaning of case (%s,"
|
||||
u"%s)" % (nf, ns)
|
||||
)
|
||||
|
||||
def test_clean_name_nonunicode(self):
|
||||
"""helpers: check if clean_name() works on non-unicode input"""
|
||||
input = 'foo $ bar/BAZ'
|
||||
test = clean_name(input).lower()
|
||||
expected = 'foo bar baz'
|
||||
self.assertEqual(
|
||||
test, expected, "check clean_name() works on non-unicode"
|
||||
)
|
||||
input = 'fóó $ BAZ'
|
||||
test = clean_name(input).lower()
|
||||
expected = clean_name('%fóó baz ').lower()
|
||||
self.assertEqual(
|
||||
test, expected, "check clean_name() with narrow non-ascii input"
|
||||
)
|
||||
@@ -374,7 +374,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"):
|
||||
|
||||
for track in hybridrelease['Tracks']:
|
||||
|
||||
cleanname = helpers.cleanName(
|
||||
cleanname = helpers.clean_name(
|
||||
artist['artist_name'] + ' ' + rg['title'] + ' ' + track['title'])
|
||||
|
||||
controlValueDict = {"TrackID": track['id'],
|
||||
@@ -710,7 +710,7 @@ def addReleaseById(rid, rgid=None):
|
||||
myDB.action('INSERT INTO releases VALUES( ?, ?)', [rid, release_dict['rgid']])
|
||||
|
||||
for track in release_dict['tracks']:
|
||||
cleanname = helpers.cleanName(
|
||||
cleanname = helpers.clean_name(
|
||||
release_dict['artist_name'] + ' ' + release_dict['rg_title'] + ' ' + track['title'])
|
||||
|
||||
controlValueDict = {"TrackID": track['id'],
|
||||
|
||||
@@ -138,7 +138,7 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None,
|
||||
# TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements)
|
||||
|
||||
if f_artist and f.album and f.title:
|
||||
CleanName = helpers.cleanName(f_artist + ' ' + f.album + ' ' + f.title)
|
||||
CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title)
|
||||
else:
|
||||
CleanName = None
|
||||
|
||||
@@ -332,15 +332,15 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None,
|
||||
# There was a bug where artists with special characters (-,') would show up in new artists.
|
||||
artist_list = [
|
||||
x for x in unique_artists
|
||||
if helpers.cleanName(x).lower() not in [
|
||||
helpers.cleanName(y[0]).lower()
|
||||
if helpers.clean_name(x).lower() not in [
|
||||
helpers.clean_name(y[0]).lower()
|
||||
for y in current_artists
|
||||
]
|
||||
]
|
||||
artists_checked = [
|
||||
x for x in unique_artists
|
||||
if helpers.cleanName(x).lower() in [
|
||||
helpers.cleanName(y[0]).lower()
|
||||
if helpers.clean_name(x).lower() in [
|
||||
helpers.clean_name(y[0]).lower()
|
||||
for y in current_artists
|
||||
]
|
||||
]
|
||||
|
||||
@@ -637,7 +637,7 @@ def get_new_releases(rgid, includeExtras=False, forcefull=False):
|
||||
|
||||
for track in release['Tracks']:
|
||||
|
||||
cleanname = helpers.cleanName(
|
||||
cleanname = helpers.clean_name(
|
||||
release['ArtistName'] + ' ' + release['AlbumTitle'] + ' ' + track['title'])
|
||||
|
||||
controlValueDict = {"TrackID": track['id'],
|
||||
|
||||
@@ -29,7 +29,7 @@ import urllib2
|
||||
import os
|
||||
import re
|
||||
from headphones import logger, searcher, db, importer, mb, lastfm, librarysync, helpers, notifiers
|
||||
from headphones.helpers import checked, radio, today, cleanName
|
||||
from headphones.helpers import checked, radio, today, clean_name
|
||||
from mako.lookup import TemplateLookup
|
||||
from mako import exceptions
|
||||
import headphones
|
||||
@@ -577,7 +577,7 @@ class WebInterface(object):
|
||||
for albums in have_albums:
|
||||
# Have to skip over manually matched tracks
|
||||
if albums['ArtistName'] and albums['AlbumTitle'] and albums['TrackTitle']:
|
||||
original_clean = helpers.cleanName(
|
||||
original_clean = helpers.clean_name(
|
||||
albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle'])
|
||||
# else:
|
||||
# original_clean = None
|
||||
@@ -595,10 +595,12 @@ class WebInterface(object):
|
||||
# unmatchedalbums = [f for f in have_album_dictionary if f not in [x for x in headphones_album_dictionary]]
|
||||
|
||||
check = set(
|
||||
[(cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) for d in
|
||||
[(clean_name(d['ArtistName']).lower(),
|
||||
clean_name(d['AlbumTitle']).lower()) for d in
|
||||
headphones_album_dictionary])
|
||||
unmatchedalbums = [d for d in have_album_dictionary if (
|
||||
cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) not in check]
|
||||
clean_name(d['ArtistName']).lower(),
|
||||
clean_name(d['AlbumTitle']).lower()) not in check]
|
||||
|
||||
return serve_template(templatename="manageunmatched.html", title="Manage Unmatched Items",
|
||||
unmatchedalbums=unmatchedalbums)
|
||||
@@ -622,8 +624,8 @@ class WebInterface(object):
|
||||
(artist, album))
|
||||
|
||||
elif action == "matchArtist":
|
||||
existing_artist_clean = helpers.cleanName(existing_artist).lower()
|
||||
new_artist_clean = helpers.cleanName(new_artist).lower()
|
||||
existing_artist_clean = helpers.clean_name(existing_artist).lower()
|
||||
new_artist_clean = helpers.clean_name(new_artist).lower()
|
||||
if new_artist_clean != existing_artist_clean:
|
||||
have_tracks = myDB.action(
|
||||
'SELECT Matched, CleanName, Location, BitRate, Format FROM have WHERE ArtistName=?',
|
||||
@@ -668,10 +670,10 @@ class WebInterface(object):
|
||||
"Artist %s already named appropriately; nothing to modify" % existing_artist)
|
||||
|
||||
elif action == "matchAlbum":
|
||||
existing_artist_clean = helpers.cleanName(existing_artist).lower()
|
||||
new_artist_clean = helpers.cleanName(new_artist).lower()
|
||||
existing_album_clean = helpers.cleanName(existing_album).lower()
|
||||
new_album_clean = helpers.cleanName(new_album).lower()
|
||||
existing_artist_clean = helpers.clean_name(existing_artist).lower()
|
||||
new_artist_clean = helpers.clean_name(new_artist).lower()
|
||||
existing_album_clean = helpers.clean_name(existing_album).lower()
|
||||
new_album_clean = helpers.clean_name(new_album).lower()
|
||||
existing_clean_string = existing_artist_clean + " " + existing_album_clean
|
||||
new_clean_string = new_artist_clean + " " + new_album_clean
|
||||
if existing_clean_string != new_clean_string:
|
||||
@@ -728,7 +730,7 @@ class WebInterface(object):
|
||||
'SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have')
|
||||
for albums in manualalbums:
|
||||
if albums['ArtistName'] and albums['AlbumTitle'] and albums['TrackTitle']:
|
||||
original_clean = helpers.cleanName(
|
||||
original_clean = helpers.clean_name(
|
||||
albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle'])
|
||||
if albums['Matched'] == "Ignored" or albums['Matched'] == "Manual" or albums[
|
||||
'CleanName'] != original_clean:
|
||||
@@ -769,7 +771,7 @@ class WebInterface(object):
|
||||
[artist])
|
||||
update_count = 0
|
||||
for tracks in update_clean:
|
||||
original_clean = helpers.cleanName(
|
||||
original_clean = helpers.clean_name(
|
||||
tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks[
|
||||
'TrackTitle']).lower()
|
||||
album = tracks['AlbumTitle']
|
||||
@@ -797,7 +799,7 @@ class WebInterface(object):
|
||||
(artist, album))
|
||||
update_count = 0
|
||||
for tracks in update_clean:
|
||||
original_clean = helpers.cleanName(
|
||||
original_clean = helpers.clean_name(
|
||||
tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks[
|
||||
'TrackTitle']).lower()
|
||||
track_title = tracks['TrackTitle']
|
||||
|
||||
Reference in New Issue
Block a user