# This file is part of the musicbrainzngs library # Copyright (C) Alastair Porter, Adrian Sampson, and others # This file is distributed under a BSD-2-Clause type license. # See the COPYING file for more information. import re import xml.etree.ElementTree as ET import logging from musicbrainzngs import util try: from ET import fixtag except: # Python < 2.7 def fixtag(tag, namespaces): # given a decorated tag (of the form {uri}tag), return prefixed # tag and namespace declaration, if any if isinstance(tag, ET.QName): tag = tag.text namespace_uri, tag = tag[1:].split("}", 1) prefix = namespaces.get(namespace_uri) if prefix is None: prefix = "ns%d" % len(namespaces) namespaces[namespace_uri] = prefix if prefix == "xml": xmlns = None else: xmlns = ("xmlns:%s" % prefix, namespace_uri) else: xmlns = None return "%s:%s" % (prefix, tag), xmlns NS_MAP = {"http://musicbrainz.org/ns/mmd-2.0#": "ws2", "http://musicbrainz.org/ns/ext#-2.0": "ext"} _log = logging.getLogger("musicbrainzngs") def make_artist_credit(artists): names = [] for artist in artists: if isinstance(artist, dict): if "name" in artist: names.append(artist.get("name", "")) else: names.append(artist.get("artist", {}).get("name", "")) else: names.append(artist) return "".join(names) def parse_elements(valid_els, inner_els, element): """ Extract single level subelements from an element. For example, given the element: Text and a list valid_els that contains "subelement", return a dict {'subelement': 'Text'} Delegate the parsing of multi-level subelements to another function. For example, given the element: FooBar and a dictionary {'subelement': parse_subelement}, call parse_subelement() and return a dict {'subelement': } if parse_subelement returns a tuple of the form ('subelement-key', ) then return a dict {'subelement-key': } instead """ result = {} for sub in element: t = fixtag(sub.tag, NS_MAP)[0] if ":" in t: t = t.split(":")[1] if t in valid_els: result[t] = sub.text or "" elif t in inner_els.keys(): inner_result = inner_els[t](sub) if isinstance(inner_result, tuple): result[inner_result[0]] = inner_result[1] else: result[t] = inner_result # add counts for lists when available m = re.match(r'([a-z0-9-]+)-list', t) if m and "count" in sub.attrib: result["%s-count" % m.group(1)] = int(sub.attrib["count"]) else: _log.info("in <%s>, uncaught <%s>", fixtag(element.tag, NS_MAP)[0], t) return result def parse_attributes(attributes, element): """ Extract attributes from an element. For example, given the element: and a list attributes that contains "type", return a dict {'type': 'Group'} """ result = {} for attr in element.attrib: if "{" in attr: a = fixtag(attr, NS_MAP)[0] else: a = attr if a in attributes: result[a] = element.attrib[attr] else: _log.info("in <%s>, uncaught attribute %s", fixtag(element.tag, NS_MAP)[0], attr) return result def parse_message(message): tree = util.bytes_to_elementtree(message) root = tree.getroot() result = {} valid_elements = {"area": parse_area, "artist": parse_artist, "label": parse_label, "place": parse_place, "release": parse_release, "release-group": parse_release_group, "recording": parse_recording, "work": parse_work, "url": parse_url, "disc": parse_disc, "cdstub": parse_cdstub, "isrc": parse_isrc, "annotation-list": parse_annotation_list, "area-list": parse_area_list, "artist-list": parse_artist_list, "label-list": parse_label_list, "place-list": parse_place_list, "release-list": parse_release_list, "release-group-list": parse_release_group_list, "recording-list": parse_recording_list, "work-list": parse_work_list, "url-list": parse_url_list, "collection-list": parse_collection_list, "collection": parse_collection, "message": parse_response_message } result.update(parse_elements([], valid_elements, root)) return result def parse_response_message(message): return parse_elements(["text"], {}, message) def parse_collection_list(cl): return [parse_collection(c) for c in cl] def parse_collection(collection): result = {} attribs = ["id"] elements = ["name", "editor"] inner_els = {"release-list": parse_release_list} result.update(parse_attributes(attribs, collection)) result.update(parse_elements(elements, inner_els, collection)) return result def parse_annotation_list(al): return [parse_annotation(a) for a in al] def parse_annotation(annotation): result = {} attribs = ["type", "ext:score"] elements = ["entity", "name", "text"] result.update(parse_attributes(attribs, annotation)) result.update(parse_elements(elements, {}, annotation)) return result def parse_lifespan(lifespan): parts = parse_elements(["begin", "end", "ended"], {}, lifespan) return parts def parse_area_list(al): return [parse_area(a) for a in al] def parse_area(area): result = {} attribs = ["id", "type", "ext:score"] elements = ["name", "sort-name", "disambiguation"] inner_els = {"life-span": parse_lifespan, "alias-list": parse_alias_list, "relation-list": parse_relation_list, "annotation": parse_annotation, "iso-3166-1-code-list": parse_element_list, "iso-3166-2-code-list": parse_element_list, "iso-3166-3-code-list": parse_element_list} result.update(parse_attributes(attribs, area)) result.update(parse_elements(elements, inner_els, area)) return result def parse_artist_list(al): return [parse_artist(a) for a in al] def parse_artist(artist): result = {} attribs = ["id", "type", "ext:score"] elements = ["name", "sort-name", "country", "user-rating", "disambiguation", "gender", "ipi"] inner_els = {"area": parse_area, "begin-area": parse_area, "end-area": parse_area, "life-span": parse_lifespan, "recording-list": parse_recording_list, "relation-list": parse_relation_list, "release-list": parse_release_list, "release-group-list": parse_release_group_list, "work-list": parse_work_list, "tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "rating": parse_rating, "ipi-list": parse_element_list, "isni-list": parse_element_list, "alias-list": parse_alias_list, "annotation": parse_annotation} result.update(parse_attributes(attribs, artist)) result.update(parse_elements(elements, inner_els, artist)) return result def parse_coordinates(c): return parse_elements(['latitude', 'longitude'], {}, c) def parse_place_list(pl): return [parse_place(p) for p in pl] def parse_place(place): result = {} attribs = ["id", "type", "ext:score"] elements = ["name", "address", "ipi", "disambiguation"] inner_els = {"area": parse_area, "coordinates": parse_coordinates, "life-span": parse_lifespan, "tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "alias-list": parse_alias_list, "relation-list": parse_relation_list, "annotation": parse_annotation} result.update(parse_attributes(attribs, place)) result.update(parse_elements(elements, inner_els, place)) return result def parse_label_list(ll): return [parse_label(l) for l in ll] def parse_label(label): result = {} attribs = ["id", "type", "ext:score"] elements = ["name", "sort-name", "country", "label-code", "user-rating", "ipi", "disambiguation"] inner_els = {"area": parse_area, "life-span": parse_lifespan, "release-list": parse_release_list, "tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "rating": parse_rating, "ipi-list": parse_element_list, "alias-list": parse_alias_list, "relation-list": parse_relation_list, "annotation": parse_annotation} result.update(parse_attributes(attribs, label)) result.update(parse_elements(elements, inner_els, label)) return result def parse_relation_target(tgt): attributes = parse_attributes(['id'], tgt) if 'id' in attributes: return ('target-id', attributes['id']) else: return ('target-id', tgt.text) def parse_relation_list(rl): attribs = ["target-type"] ttype = parse_attributes(attribs, rl) key = "%s-relation-list" % ttype["target-type"] return (key, [parse_relation(r) for r in rl]) def parse_relation(relation): result = {} attribs = ["type", "type-id"] elements = ["target", "direction", "begin", "end", "ended"] inner_els = {"area": parse_area, "artist": parse_artist, "label": parse_label, "place": parse_place, "recording": parse_recording, "release": parse_release, "release-group": parse_release_group, "attribute-list": parse_element_list, "work": parse_work, "target": parse_relation_target } result.update(parse_attributes(attribs, relation)) result.update(parse_elements(elements, inner_els, relation)) return result def parse_release(release): result = {} attribs = ["id", "ext:score"] elements = ["title", "status", "disambiguation", "quality", "country", "barcode", "date", "packaging", "asin"] inner_els = {"text-representation": parse_text_representation, "artist-credit": parse_artist_credit, "label-info-list": parse_label_info_list, "medium-list": parse_medium_list, "release-group": parse_release_group, "relation-list": parse_relation_list, "annotation": parse_annotation, "cover-art-archive": parse_caa, "release-event-list": parse_release_event_list} result.update(parse_attributes(attribs, release)) result.update(parse_elements(elements, inner_els, release)) if "artist-credit" in result: result["artist-credit-phrase"] = make_artist_credit( result["artist-credit"]) return result def parse_medium_list(ml): return [parse_medium(m) for m in ml] def parse_release_event_list(rel): return [parse_release_event(re) for re in rel] def parse_release_event(event): result = {} elements = ["date"] inner_els = {"area": parse_area} result.update(parse_elements(elements, inner_els, event)) return result def parse_medium(medium): result = {} elements = ["position", "format", "title"] inner_els = {"disc-list": parse_disc_list, "track-list": parse_track_list} result.update(parse_elements(elements, inner_els, medium)) return result def parse_disc_list(dl): return [parse_disc(d) for d in dl] def parse_text_representation(textr): return parse_elements(["language", "script"], {}, textr) def parse_release_group(rg): result = {} attribs = ["id", "type", "ext:score"] elements = ["title", "user-rating", "first-release-date", "primary-type", "disambiguation"] inner_els = {"artist-credit": parse_artist_credit, "release-list": parse_release_list, "tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "secondary-type-list": parse_element_list, "relation-list": parse_relation_list, "rating": parse_rating, "annotation": parse_annotation} result.update(parse_attributes(attribs, rg)) result.update(parse_elements(elements, inner_els, rg)) if "artist-credit" in result: result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"]) return result def parse_recording(recording): result = {} attribs = ["id", "ext:score"] elements = ["title", "length", "user-rating", "disambiguation", "video"] inner_els = {"artist-credit": parse_artist_credit, "release-list": parse_release_list, "tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "rating": parse_rating, "isrc-list": parse_external_id_list, "echoprint-list": parse_external_id_list, "relation-list": parse_relation_list, "annotation": parse_annotation} result.update(parse_attributes(attribs, recording)) result.update(parse_elements(elements, inner_els, recording)) if "artist-credit" in result: result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"]) return result def parse_external_id_list(pl): return [parse_attributes(["id"], p)["id"] for p in pl] def parse_element_list(el): return [e.text for e in el] def parse_work_list(wl): return [parse_work(w) for w in wl] def parse_work(work): result = {} attribs = ["id", "ext:score", "type"] elements = ["title", "user-rating", "language", "iswc", "disambiguation"] inner_els = {"tag-list": parse_tag_list, "user-tag-list": parse_tag_list, "rating": parse_rating, "alias-list": parse_alias_list, "iswc-list": parse_element_list, "relation-list": parse_relation_list, "annotation": parse_response_message} result.update(parse_attributes(attribs, work)) result.update(parse_elements(elements, inner_els, work)) return result def parse_url_list(ul): return [parse_url(u) for u in ul] def parse_url(url): result = {} attribs = ["id"] elements = ["resource"] inner_els = {"relation-list": parse_relation_list} result.update(parse_attributes(attribs, url)) result.update(parse_elements(elements, inner_els, url)) return result def parse_disc(disc): result = {} attribs = ["id"] elements = ["sectors"] inner_els = {"release-list": parse_release_list} result.update(parse_attributes(attribs, disc)) result.update(parse_elements(elements, inner_els, disc)) return result def parse_cdstub(cdstub): result = {} attribs = ["id"] elements = ["title", "artist", "barcode"] inner_els = {"track-list": parse_track_list} result.update(parse_attributes(attribs, cdstub)) result.update(parse_elements(elements, inner_els, cdstub)) return result def parse_release_list(rl): result = [] for r in rl: result.append(parse_release(r)) return result def parse_release_group_list(rgl): result = [] for rg in rgl: result.append(parse_release_group(rg)) return result def parse_isrc(isrc): result = {} attribs = ["id"] inner_els = {"recording-list": parse_recording_list} result.update(parse_attributes(attribs, isrc)) result.update(parse_elements([], inner_els, isrc)) return result def parse_recording_list(recs): result = [] for r in recs: result.append(parse_recording(r)) return result def parse_artist_credit(ac): result = [] for namecredit in ac: result.append(parse_name_credit(namecredit)) join = parse_attributes(["joinphrase"], namecredit) if "joinphrase" in join: result.append(join["joinphrase"]) return result def parse_name_credit(nc): result = {} elements = ["name"] inner_els = {"artist": parse_artist} result.update(parse_elements(elements, inner_els, nc)) return result def parse_label_info_list(lil): result = [] for li in lil: result.append(parse_label_info(li)) return result def parse_label_info(li): result = {} elements = ["catalog-number"] inner_els = {"label": parse_label} result.update(parse_elements(elements, inner_els, li)) return result def parse_track_list(tl): result = [] for t in tl: result.append(parse_track(t)) return result def parse_track(track): result = {} attribs = ["id"] elements = ["number", "position", "title", "length"] inner_els = {"recording": parse_recording, "artist-credit": parse_artist_credit} result.update(parse_attributes(attribs, track)) result.update(parse_elements(elements, inner_els, track)) if "artist-credit" in result.get("recording", {}) and "artist-credit" not in result: result["artist-credit"] = result["recording"]["artist-credit"] if "artist-credit" in result: result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"]) # Make a length field that contains track length or recording length track_or_recording = None if "length" in result: track_or_recording = result["length"] elif result.get("recording", {}).get("length"): track_or_recording = result.get("recording", {}).get("length") if track_or_recording: result["track_or_recording_length"] = track_or_recording return result def parse_tag_list(tl): return [parse_tag(t) for t in tl] def parse_tag(tag): result = {} attribs = ["count"] elements = ["name"] result.update(parse_attributes(attribs, tag)) result.update(parse_elements(elements, {}, tag)) return result def parse_rating(rating): result = {} attribs = ["votes-count"] result.update(parse_attributes(attribs, rating)) result["rating"] = rating.text return result def parse_alias_list(al): return [parse_alias(a) for a in al] def parse_alias(alias): result = {} attribs = ["locale", "sort-name", "type", "primary", "begin-date", "end-date"] result.update(parse_attributes(attribs, alias)) result["alias"] = alias.text return result def parse_caa(caa_element): result = {} elements = ["artwork", "count", "front", "back", "darkened"] result.update(parse_elements(elements, {}, caa_element)) return result ### def make_barcode_request(release2barcode): NS = "http://musicbrainz.org/ns/mmd-2.0#" root = ET.Element("{%s}metadata" % NS) rel_list = ET.SubElement(root, "{%s}release-list" % NS) for release, barcode in release2barcode.items(): rel_xml = ET.SubElement(rel_list, "{%s}release" % NS) bar_xml = ET.SubElement(rel_xml, "{%s}barcode" % NS) rel_xml.set("{%s}id" % NS, release) bar_xml.text = barcode return ET.tostring(root, "utf-8") def make_tag_request(artist2tags, recording2tags): NS = "http://musicbrainz.org/ns/mmd-2.0#" root = ET.Element("{%s}metadata" % NS) rec_list = ET.SubElement(root, "{%s}recording-list" % NS) for rec, tags in recording2tags.items(): rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS) rec_xml.set("{%s}id" % NS, rec) taglist = ET.SubElement(rec_xml, "{%s}user-tag-list" % NS) for tag in tags: usertag_xml = ET.SubElement(taglist, "{%s}user-tag" % NS) name_xml = ET.SubElement(usertag_xml, "{%s}name" % NS) name_xml.text = tag art_list = ET.SubElement(root, "{%s}artist-list" % NS) for art, tags in artist2tags.items(): art_xml = ET.SubElement(art_list, "{%s}artist" % NS) art_xml.set("{%s}id" % NS, art) taglist = ET.SubElement(art_xml, "{%s}user-tag-list" % NS) for tag in tags: usertag_xml = ET.SubElement(taglist, "{%s}user-tag" % NS) name_xml = ET.SubElement(usertag_xml, "{%s}name" % NS) name_xml.text = tag return ET.tostring(root, "utf-8") def make_rating_request(artist2rating, recording2rating): NS = "http://musicbrainz.org/ns/mmd-2.0#" root = ET.Element("{%s}metadata" % NS) rec_list = ET.SubElement(root, "{%s}recording-list" % NS) for rec, rating in recording2rating.items(): rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS) rec_xml.set("{%s}id" % NS, rec) rating_xml = ET.SubElement(rec_xml, "{%s}user-rating" % NS) rating_xml.text = str(rating) art_list = ET.SubElement(root, "{%s}artist-list" % NS) for art, rating in artist2rating.items(): art_xml = ET.SubElement(art_list, "{%s}artist" % NS) art_xml.set("{%s}id" % NS, art) rating_xml = ET.SubElement(art_xml, "{%s}user-rating" % NS) rating_xml.text = str(rating) return ET.tostring(root, "utf-8") def make_isrc_request(recording2isrcs): NS = "http://musicbrainz.org/ns/mmd-2.0#" root = ET.Element("{%s}metadata" % NS) rec_list = ET.SubElement(root, "{%s}recording-list" % NS) for rec, isrcs in recording2isrcs.items(): if len(isrcs) > 0: rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS) rec_xml.set("{%s}id" % NS, rec) isrc_list_xml = ET.SubElement(rec_xml, "{%s}isrc-list" % NS) isrc_list_xml.set("{%s}count" % NS, str(len(isrcs))) for isrc in isrcs: isrc_xml = ET.SubElement(isrc_list_xml, "{%s}isrc" % NS) isrc_xml.set("{%s}id" % NS, isrc) return ET.tostring(root, "utf-8")